1// SPDX-License-Identifier: GPL-2.0
2#include <linux/kernel.h>
3#include <linux/errno.h>
4#include <linux/file.h>
5#include <linux/slab.h>
6#include <linux/net.h>
7#include <linux/compat.h>
8#include <net/compat.h>
9#include <linux/io_uring.h>
10
11#include <uapi/linux/io_uring.h>
12
13#include "filetable.h"
14#include "io_uring.h"
15#include "kbuf.h"
16#include "alloc_cache.h"
17#include "net.h"
18#include "notif.h"
19#include "rsrc.h"
20#include "zcrx.h"
21
22struct io_shutdown {
23 struct file *file;
24 int how;
25};
26
27struct io_accept {
28 struct file *file;
29 struct sockaddr __user *addr;
30 int __user *addr_len;
31 int flags;
32 int iou_flags;
33 u32 file_slot;
34 unsigned long nofile;
35};
36
37struct io_socket {
38 struct file *file;
39 int domain;
40 int type;
41 int protocol;
42 int flags;
43 u32 file_slot;
44 unsigned long nofile;
45};
46
47struct io_connect {
48 struct file *file;
49 struct sockaddr __user *addr;
50 int addr_len;
51 bool in_progress;
52 bool seen_econnaborted;
53};
54
55struct io_bind {
56 struct file *file;
57 int addr_len;
58};
59
60struct io_listen {
61 struct file *file;
62 int backlog;
63};
64
65struct io_sr_msg {
66 struct file *file;
67 union {
68 struct compat_msghdr __user *umsg_compat;
69 struct user_msghdr __user *umsg;
70 void __user *buf;
71 };
72 int len;
73 unsigned done_io;
74 unsigned msg_flags;
75 unsigned nr_multishot_loops;
76 u16 flags;
77 /* initialised and used only by !msg send variants */
78 u16 buf_group;
79 /* per-invocation mshot limit */
80 unsigned mshot_len;
81 /* overall mshot byte limit */
82 unsigned mshot_total_len;
83 void __user *msg_control;
84 /* used only for send zerocopy */
85 struct io_kiocb *notif;
86};
87
88/*
89 * The UAPI flags are the lower 8 bits, as that's all sqe->ioprio will hold
90 * anyway. Use the upper 8 bits for internal uses.
91 */
92enum sr_retry_flags {
93 IORING_RECV_RETRY = (1U << 15),
94 IORING_RECV_PARTIAL_MAP = (1U << 14),
95 IORING_RECV_MSHOT_CAP = (1U << 13),
96 IORING_RECV_MSHOT_LIM = (1U << 12),
97 IORING_RECV_MSHOT_DONE = (1U << 11),
98
99 IORING_RECV_RETRY_CLEAR = IORING_RECV_RETRY | IORING_RECV_PARTIAL_MAP,
100 IORING_RECV_NO_RETRY = IORING_RECV_RETRY | IORING_RECV_PARTIAL_MAP |
101 IORING_RECV_MSHOT_CAP | IORING_RECV_MSHOT_DONE,
102};
103
104/*
105 * Number of times we'll try and do receives if there's more data. If we
106 * exceed this limit, then add us to the back of the queue and retry from
107 * there. This helps fairness between flooding clients.
108 */
109#define MULTISHOT_MAX_RETRY 32
110
111struct io_recvzc {
112 struct file *file;
113 unsigned msg_flags;
114 u16 flags;
115 u32 len;
116 struct io_zcrx_ifq *ifq;
117};
118
119static int io_sg_from_iter_iovec(struct sk_buff *skb,
120 struct iov_iter *from, size_t length);
121static int io_sg_from_iter(struct sk_buff *skb,
122 struct iov_iter *from, size_t length);
123
124int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
125{
126 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown);
127
128 if (unlikely(sqe->off || sqe->addr || sqe->rw_flags ||
129 sqe->buf_index || sqe->splice_fd_in))
130 return -EINVAL;
131
132 shutdown->how = READ_ONCE(sqe->len);
133 req->flags |= REQ_F_FORCE_ASYNC;
134 return 0;
135}
136
137int io_shutdown(struct io_kiocb *req, unsigned int issue_flags)
138{
139 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown);
140 struct socket *sock;
141 int ret;
142
143 WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
144
145 sock = sock_from_file(file: req->file);
146 if (unlikely(!sock))
147 return -ENOTSOCK;
148
149 ret = __sys_shutdown_sock(sock, how: shutdown->how);
150 io_req_set_res(req, res: ret, cflags: 0);
151 return IOU_COMPLETE;
152}
153
154static bool io_net_retry(struct socket *sock, int flags)
155{
156 if (!(flags & MSG_WAITALL))
157 return false;
158 return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET;
159}
160
161static void io_netmsg_iovec_free(struct io_async_msghdr *kmsg)
162{
163 if (kmsg->vec.iovec)
164 io_vec_free(iv: &kmsg->vec);
165}
166
167static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags)
168{
169 struct io_async_msghdr *hdr = req->async_data;
170
171 /* can't recycle, ensure we free the iovec if we have one */
172 if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) {
173 io_netmsg_iovec_free(kmsg: hdr);
174 return;
175 }
176
177 /* Let normal cleanup path reap it if we fail adding to the cache */
178 io_alloc_cache_vec_kasan(iv: &hdr->vec);
179 if (hdr->vec.nr > IO_VEC_CACHE_SOFT_CAP)
180 io_vec_free(iv: &hdr->vec);
181
182 if (io_alloc_cache_put(cache: &req->ctx->netmsg_cache, entry: hdr))
183 io_req_async_data_clear(req, extra_flags: REQ_F_NEED_CLEANUP);
184}
185
186static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req)
187{
188 struct io_ring_ctx *ctx = req->ctx;
189 struct io_async_msghdr *hdr;
190
191 hdr = io_uring_alloc_async_data(cache: &ctx->netmsg_cache, req);
192 if (!hdr)
193 return NULL;
194
195 /* If the async data was cached, we might have an iov cached inside. */
196 if (hdr->vec.iovec)
197 req->flags |= REQ_F_NEED_CLEANUP;
198 return hdr;
199}
200
201static inline void io_mshot_prep_retry(struct io_kiocb *req,
202 struct io_async_msghdr *kmsg)
203{
204 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
205
206 req->flags &= ~REQ_F_BL_EMPTY;
207 sr->done_io = 0;
208 sr->flags &= ~IORING_RECV_RETRY_CLEAR;
209 sr->len = sr->mshot_len;
210}
211
212static int io_net_import_vec(struct io_kiocb *req, struct io_async_msghdr *iomsg,
213 const struct iovec __user *uiov, unsigned uvec_seg,
214 int ddir)
215{
216 struct iovec *iov;
217 int ret, nr_segs;
218
219 if (iomsg->vec.iovec) {
220 nr_segs = iomsg->vec.nr;
221 iov = iomsg->vec.iovec;
222 } else {
223 nr_segs = 1;
224 iov = &iomsg->fast_iov;
225 }
226
227 ret = __import_iovec(type: ddir, uvec: uiov, nr_segs: uvec_seg, fast_segs: nr_segs, iovp: &iov,
228 i: &iomsg->msg.msg_iter, compat: io_is_compat(ctx: req->ctx));
229 if (unlikely(ret < 0))
230 return ret;
231
232 if (iov) {
233 req->flags |= REQ_F_NEED_CLEANUP;
234 io_vec_reset_iovec(iv: &iomsg->vec, iovec: iov, nr: iomsg->msg.msg_iter.nr_segs);
235 }
236 return 0;
237}
238
239static int io_compat_msg_copy_hdr(struct io_kiocb *req,
240 struct io_async_msghdr *iomsg,
241 struct compat_msghdr *msg, int ddir,
242 struct sockaddr __user **save_addr)
243{
244 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
245 struct compat_iovec __user *uiov;
246 int ret;
247
248 if (copy_from_user(to: msg, from: sr->umsg_compat, n: sizeof(*msg)))
249 return -EFAULT;
250
251 ret = __get_compat_msghdr(kmsg: &iomsg->msg, msg, save_addr);
252 if (ret)
253 return ret;
254
255 uiov = compat_ptr(uptr: msg->msg_iov);
256 if (req->flags & REQ_F_BUFFER_SELECT) {
257 if (msg->msg_iovlen == 0) {
258 sr->len = 0;
259 } else if (msg->msg_iovlen > 1) {
260 return -EINVAL;
261 } else {
262 struct compat_iovec tmp_iov;
263
264 if (copy_from_user(to: &tmp_iov, from: uiov, n: sizeof(tmp_iov)))
265 return -EFAULT;
266 sr->len = tmp_iov.iov_len;
267 }
268 }
269 return 0;
270}
271
272static int io_copy_msghdr_from_user(struct user_msghdr *msg,
273 struct user_msghdr __user *umsg)
274{
275 if (!user_access_begin(umsg, sizeof(*umsg)))
276 return -EFAULT;
277 unsafe_get_user(msg->msg_name, &umsg->msg_name, ua_end);
278 unsafe_get_user(msg->msg_namelen, &umsg->msg_namelen, ua_end);
279 unsafe_get_user(msg->msg_iov, &umsg->msg_iov, ua_end);
280 unsafe_get_user(msg->msg_iovlen, &umsg->msg_iovlen, ua_end);
281 unsafe_get_user(msg->msg_control, &umsg->msg_control, ua_end);
282 unsafe_get_user(msg->msg_controllen, &umsg->msg_controllen, ua_end);
283 user_access_end();
284 return 0;
285ua_end:
286 user_access_end();
287 return -EFAULT;
288}
289
290static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg,
291 struct user_msghdr *msg, int ddir,
292 struct sockaddr __user **save_addr)
293{
294 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
295 struct user_msghdr __user *umsg = sr->umsg;
296 int ret;
297
298 iomsg->msg.msg_name = &iomsg->addr;
299 iomsg->msg.msg_iter.nr_segs = 0;
300
301 if (io_is_compat(ctx: req->ctx)) {
302 struct compat_msghdr cmsg;
303
304 ret = io_compat_msg_copy_hdr(req, iomsg, msg: &cmsg, ddir, save_addr);
305 if (ret)
306 return ret;
307
308 memset(s: msg, c: 0, n: sizeof(*msg));
309 msg->msg_namelen = cmsg.msg_namelen;
310 msg->msg_controllen = cmsg.msg_controllen;
311 msg->msg_iov = compat_ptr(uptr: cmsg.msg_iov);
312 msg->msg_iovlen = cmsg.msg_iovlen;
313 return 0;
314 }
315
316 ret = io_copy_msghdr_from_user(msg, umsg);
317 if (unlikely(ret))
318 return ret;
319
320 msg->msg_flags = 0;
321
322 ret = __copy_msghdr(kmsg: &iomsg->msg, umsg: msg, save_addr);
323 if (ret)
324 return ret;
325
326 if (req->flags & REQ_F_BUFFER_SELECT) {
327 if (msg->msg_iovlen == 0) {
328 sr->len = 0;
329 } else if (msg->msg_iovlen > 1) {
330 return -EINVAL;
331 } else {
332 struct iovec __user *uiov = msg->msg_iov;
333 struct iovec tmp_iov;
334
335 if (copy_from_user(to: &tmp_iov, from: uiov, n: sizeof(tmp_iov)))
336 return -EFAULT;
337 sr->len = tmp_iov.iov_len;
338 }
339 }
340 return 0;
341}
342
343void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req)
344{
345 struct io_async_msghdr *io = req->async_data;
346
347 io_netmsg_iovec_free(kmsg: io);
348}
349
350static int io_send_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe)
351{
352 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
353 struct io_async_msghdr *kmsg = req->async_data;
354 void __user *addr;
355 u16 addr_len;
356 int ret;
357
358 sr->buf = u64_to_user_ptr(READ_ONCE(sqe->addr));
359
360 if (READ_ONCE(sqe->__pad3[0]))
361 return -EINVAL;
362
363 kmsg->msg.msg_name = NULL;
364 kmsg->msg.msg_namelen = 0;
365 kmsg->msg.msg_control = NULL;
366 kmsg->msg.msg_controllen = 0;
367 kmsg->msg.msg_ubuf = NULL;
368
369 addr = u64_to_user_ptr(READ_ONCE(sqe->addr2));
370 addr_len = READ_ONCE(sqe->addr_len);
371 if (addr) {
372 ret = move_addr_to_kernel(uaddr: addr, ulen: addr_len, kaddr: &kmsg->addr);
373 if (unlikely(ret < 0))
374 return ret;
375 kmsg->msg.msg_name = &kmsg->addr;
376 kmsg->msg.msg_namelen = addr_len;
377 }
378 if (sr->flags & IORING_RECVSEND_FIXED_BUF) {
379 req->flags |= REQ_F_IMPORT_BUFFER;
380 return 0;
381 }
382 if (req->flags & REQ_F_BUFFER_SELECT)
383 return 0;
384
385 if (sr->flags & IORING_SEND_VECTORIZED)
386 return io_net_import_vec(req, iomsg: kmsg, uiov: sr->buf, uvec_seg: sr->len, ITER_SOURCE);
387
388 return import_ubuf(ITER_SOURCE, buf: sr->buf, len: sr->len, i: &kmsg->msg.msg_iter);
389}
390
391static int io_sendmsg_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe)
392{
393 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
394 struct io_async_msghdr *kmsg = req->async_data;
395 struct user_msghdr msg;
396 int ret;
397
398 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
399 ret = io_msg_copy_hdr(req, iomsg: kmsg, msg: &msg, ITER_SOURCE, NULL);
400 if (unlikely(ret))
401 return ret;
402 /* save msg_control as sys_sendmsg() overwrites it */
403 sr->msg_control = kmsg->msg.msg_control_user;
404
405 if (sr->flags & IORING_RECVSEND_FIXED_BUF) {
406 kmsg->msg.msg_iter.nr_segs = msg.msg_iovlen;
407 return io_prep_reg_iovec(req, iv: &kmsg->vec, uvec: msg.msg_iov,
408 uvec_segs: msg.msg_iovlen);
409 }
410 if (req->flags & REQ_F_BUFFER_SELECT)
411 return 0;
412 return io_net_import_vec(req, iomsg: kmsg, uiov: msg.msg_iov, uvec_seg: msg.msg_iovlen, ITER_SOURCE);
413}
414
415#define SENDMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_BUNDLE | IORING_SEND_VECTORIZED)
416
417int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
418{
419 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
420
421 sr->done_io = 0;
422 sr->len = READ_ONCE(sqe->len);
423 sr->flags = READ_ONCE(sqe->ioprio);
424 if (sr->flags & ~SENDMSG_FLAGS)
425 return -EINVAL;
426 sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
427 if (sr->msg_flags & MSG_DONTWAIT)
428 req->flags |= REQ_F_NOWAIT;
429 if (req->flags & REQ_F_BUFFER_SELECT)
430 sr->buf_group = req->buf_index;
431 if (sr->flags & IORING_RECVSEND_BUNDLE) {
432 if (req->opcode == IORING_OP_SENDMSG)
433 return -EINVAL;
434 sr->msg_flags |= MSG_WAITALL;
435 req->flags |= REQ_F_MULTISHOT;
436 }
437
438 if (io_is_compat(ctx: req->ctx))
439 sr->msg_flags |= MSG_CMSG_COMPAT;
440
441 if (unlikely(!io_msg_alloc_async(req)))
442 return -ENOMEM;
443 if (req->opcode != IORING_OP_SENDMSG)
444 return io_send_setup(req, sqe);
445 if (unlikely(sqe->addr2 || sqe->file_index))
446 return -EINVAL;
447 return io_sendmsg_setup(req, sqe);
448}
449
450static void io_req_msg_cleanup(struct io_kiocb *req,
451 unsigned int issue_flags)
452{
453 io_netmsg_recycle(req, issue_flags);
454}
455
456/*
457 * For bundle completions, we need to figure out how many segments we consumed.
458 * A bundle could be using a single ITER_UBUF if that's all we mapped, or it
459 * could be using an ITER_IOVEC. If the latter, then if we consumed all of
460 * the segments, then it's a trivial questiont o answer. If we have residual
461 * data in the iter, then loop the segments to figure out how much we
462 * transferred.
463 */
464static int io_bundle_nbufs(struct io_async_msghdr *kmsg, int ret)
465{
466 struct iovec *iov;
467 int nbufs;
468
469 /* no data is always zero segments, and a ubuf is always 1 segment */
470 if (ret <= 0)
471 return 0;
472 if (iter_is_ubuf(i: &kmsg->msg.msg_iter))
473 return 1;
474
475 iov = kmsg->vec.iovec;
476 if (!iov)
477 iov = &kmsg->fast_iov;
478
479 /* if all data was transferred, it's basic pointer math */
480 if (!iov_iter_count(i: &kmsg->msg.msg_iter))
481 return iter_iov(iter: &kmsg->msg.msg_iter) - iov;
482
483 /* short transfer, count segments */
484 nbufs = 0;
485 do {
486 int this_len = min_t(int, iov[nbufs].iov_len, ret);
487
488 nbufs++;
489 ret -= this_len;
490 } while (ret);
491
492 return nbufs;
493}
494
495static int io_net_kbuf_recyle(struct io_kiocb *req, struct io_buffer_list *bl,
496 struct io_async_msghdr *kmsg, int len)
497{
498 req->flags |= REQ_F_BL_NO_RECYCLE;
499 if (req->flags & REQ_F_BUFFERS_COMMIT)
500 io_kbuf_commit(req, bl, len, nr: io_bundle_nbufs(kmsg, ret: len));
501 return IOU_RETRY;
502}
503
504static inline bool io_send_finish(struct io_kiocb *req,
505 struct io_async_msghdr *kmsg,
506 struct io_br_sel *sel)
507{
508 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
509 bool bundle_finished = sel->val <= 0;
510 unsigned int cflags;
511
512 if (!(sr->flags & IORING_RECVSEND_BUNDLE)) {
513 cflags = io_put_kbuf(req, len: sel->val, bl: sel->buf_list);
514 goto finish;
515 }
516
517 cflags = io_put_kbufs(req, len: sel->val, bl: sel->buf_list, nbufs: io_bundle_nbufs(kmsg, ret: sel->val));
518
519 if (bundle_finished || req->flags & REQ_F_BL_EMPTY)
520 goto finish;
521
522 /*
523 * Fill CQE for this receive and see if we should keep trying to
524 * receive from this socket.
525 */
526 if (io_req_post_cqe(req, res: sel->val, cflags: cflags | IORING_CQE_F_MORE)) {
527 io_mshot_prep_retry(req, kmsg);
528 return false;
529 }
530
531 /* Otherwise stop bundle and use the current result. */
532finish:
533 io_req_set_res(req, res: sel->val, cflags);
534 sel->val = IOU_COMPLETE;
535 return true;
536}
537
538int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
539{
540 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
541 struct io_async_msghdr *kmsg = req->async_data;
542 struct socket *sock;
543 unsigned flags;
544 int min_ret = 0;
545 int ret;
546
547 sock = sock_from_file(file: req->file);
548 if (unlikely(!sock))
549 return -ENOTSOCK;
550
551 if (!(req->flags & REQ_F_POLLED) &&
552 (sr->flags & IORING_RECVSEND_POLL_FIRST))
553 return -EAGAIN;
554
555 flags = sr->msg_flags;
556 if (issue_flags & IO_URING_F_NONBLOCK)
557 flags |= MSG_DONTWAIT;
558 if (flags & MSG_WAITALL)
559 min_ret = iov_iter_count(i: &kmsg->msg.msg_iter);
560
561 kmsg->msg.msg_control_user = sr->msg_control;
562
563 ret = __sys_sendmsg_sock(sock, msg: &kmsg->msg, flags);
564
565 if (ret < min_ret) {
566 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
567 return -EAGAIN;
568 if (ret > 0 && io_net_retry(sock, flags)) {
569 kmsg->msg.msg_controllen = 0;
570 kmsg->msg.msg_control = NULL;
571 sr->done_io += ret;
572 return -EAGAIN;
573 }
574 if (ret == -ERESTARTSYS)
575 ret = -EINTR;
576 req_set_fail(req);
577 }
578 io_req_msg_cleanup(req, issue_flags);
579 if (ret >= 0)
580 ret += sr->done_io;
581 else if (sr->done_io)
582 ret = sr->done_io;
583 io_req_set_res(req, res: ret, cflags: 0);
584 return IOU_COMPLETE;
585}
586
587static int io_send_select_buffer(struct io_kiocb *req, unsigned int issue_flags,
588 struct io_br_sel *sel, struct io_async_msghdr *kmsg)
589{
590 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
591 struct buf_sel_arg arg = {
592 .iovs = &kmsg->fast_iov,
593 .max_len = min_not_zero(sr->len, INT_MAX),
594 .nr_iovs = 1,
595 .buf_group = sr->buf_group,
596 };
597 int ret;
598
599 if (kmsg->vec.iovec) {
600 arg.nr_iovs = kmsg->vec.nr;
601 arg.iovs = kmsg->vec.iovec;
602 arg.mode = KBUF_MODE_FREE;
603 }
604
605 if (!(sr->flags & IORING_RECVSEND_BUNDLE))
606 arg.nr_iovs = 1;
607 else
608 arg.mode |= KBUF_MODE_EXPAND;
609
610 ret = io_buffers_select(req, arg: &arg, sel, issue_flags);
611 if (unlikely(ret < 0))
612 return ret;
613
614 if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) {
615 kmsg->vec.nr = ret;
616 kmsg->vec.iovec = arg.iovs;
617 req->flags |= REQ_F_NEED_CLEANUP;
618 }
619 sr->len = arg.out_len;
620
621 if (ret == 1) {
622 sr->buf = arg.iovs[0].iov_base;
623 ret = import_ubuf(ITER_SOURCE, buf: sr->buf, len: sr->len,
624 i: &kmsg->msg.msg_iter);
625 if (unlikely(ret))
626 return ret;
627 } else {
628 iov_iter_init(i: &kmsg->msg.msg_iter, ITER_SOURCE,
629 iov: arg.iovs, nr_segs: ret, count: arg.out_len);
630 }
631
632 return 0;
633}
634
635int io_send(struct io_kiocb *req, unsigned int issue_flags)
636{
637 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
638 struct io_async_msghdr *kmsg = req->async_data;
639 struct io_br_sel sel = { };
640 struct socket *sock;
641 unsigned flags;
642 int min_ret = 0;
643 int ret;
644
645 sock = sock_from_file(file: req->file);
646 if (unlikely(!sock))
647 return -ENOTSOCK;
648
649 if (!(req->flags & REQ_F_POLLED) &&
650 (sr->flags & IORING_RECVSEND_POLL_FIRST))
651 return -EAGAIN;
652
653 flags = sr->msg_flags;
654 if (issue_flags & IO_URING_F_NONBLOCK)
655 flags |= MSG_DONTWAIT;
656
657retry_bundle:
658 sel.buf_list = NULL;
659 if (io_do_buffer_select(req)) {
660 ret = io_send_select_buffer(req, issue_flags, sel: &sel, kmsg);
661 if (ret)
662 return ret;
663 }
664
665 /*
666 * If MSG_WAITALL is set, or this is a bundle send, then we need
667 * the full amount. If just bundle is set, if we do a short send
668 * then we complete the bundle sequence rather than continue on.
669 */
670 if (flags & MSG_WAITALL || sr->flags & IORING_RECVSEND_BUNDLE)
671 min_ret = iov_iter_count(i: &kmsg->msg.msg_iter);
672
673 flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
674 kmsg->msg.msg_flags = flags;
675 ret = sock_sendmsg(sock, msg: &kmsg->msg);
676 if (ret < min_ret) {
677 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
678 return -EAGAIN;
679
680 if (ret > 0 && io_net_retry(sock, flags)) {
681 sr->len -= ret;
682 sr->buf += ret;
683 sr->done_io += ret;
684 return io_net_kbuf_recyle(req, bl: sel.buf_list, kmsg, len: ret);
685 }
686 if (ret == -ERESTARTSYS)
687 ret = -EINTR;
688 req_set_fail(req);
689 }
690 if (ret >= 0)
691 ret += sr->done_io;
692 else if (sr->done_io)
693 ret = sr->done_io;
694
695 sel.val = ret;
696 if (!io_send_finish(req, kmsg, sel: &sel))
697 goto retry_bundle;
698
699 io_req_msg_cleanup(req, issue_flags);
700 return sel.val;
701}
702
703static int io_recvmsg_mshot_prep(struct io_kiocb *req,
704 struct io_async_msghdr *iomsg,
705 int namelen, size_t controllen)
706{
707 if ((req->flags & (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) ==
708 (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) {
709 int hdr;
710
711 if (unlikely(namelen < 0))
712 return -EOVERFLOW;
713 if (check_add_overflow(sizeof(struct io_uring_recvmsg_out),
714 namelen, &hdr))
715 return -EOVERFLOW;
716 if (check_add_overflow(hdr, controllen, &hdr))
717 return -EOVERFLOW;
718
719 iomsg->namelen = namelen;
720 iomsg->controllen = controllen;
721 return 0;
722 }
723
724 return 0;
725}
726
727static int io_recvmsg_copy_hdr(struct io_kiocb *req,
728 struct io_async_msghdr *iomsg)
729{
730 struct user_msghdr msg;
731 int ret;
732
733 ret = io_msg_copy_hdr(req, iomsg, msg: &msg, ITER_DEST, save_addr: &iomsg->uaddr);
734 if (unlikely(ret))
735 return ret;
736
737 if (!(req->flags & REQ_F_BUFFER_SELECT)) {
738 ret = io_net_import_vec(req, iomsg, uiov: msg.msg_iov, uvec_seg: msg.msg_iovlen,
739 ITER_DEST);
740 if (unlikely(ret))
741 return ret;
742 }
743 return io_recvmsg_mshot_prep(req, iomsg, namelen: msg.msg_namelen,
744 controllen: msg.msg_controllen);
745}
746
747static int io_recvmsg_prep_setup(struct io_kiocb *req)
748{
749 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
750 struct io_async_msghdr *kmsg;
751
752 kmsg = io_msg_alloc_async(req);
753 if (unlikely(!kmsg))
754 return -ENOMEM;
755
756 if (req->opcode == IORING_OP_RECV) {
757 kmsg->msg.msg_name = NULL;
758 kmsg->msg.msg_namelen = 0;
759 kmsg->msg.msg_inq = 0;
760 kmsg->msg.msg_control = NULL;
761 kmsg->msg.msg_get_inq = 1;
762 kmsg->msg.msg_controllen = 0;
763 kmsg->msg.msg_iocb = NULL;
764 kmsg->msg.msg_ubuf = NULL;
765
766 if (req->flags & REQ_F_BUFFER_SELECT)
767 return 0;
768 return import_ubuf(ITER_DEST, buf: sr->buf, len: sr->len,
769 i: &kmsg->msg.msg_iter);
770 }
771
772 return io_recvmsg_copy_hdr(req, iomsg: kmsg);
773}
774
775#define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT | \
776 IORING_RECVSEND_BUNDLE)
777
778int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
779{
780 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
781
782 sr->done_io = 0;
783
784 if (unlikely(sqe->addr2))
785 return -EINVAL;
786
787 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
788 sr->len = READ_ONCE(sqe->len);
789 sr->flags = READ_ONCE(sqe->ioprio);
790 if (sr->flags & ~RECVMSG_FLAGS)
791 return -EINVAL;
792 sr->msg_flags = READ_ONCE(sqe->msg_flags);
793 if (sr->msg_flags & MSG_DONTWAIT)
794 req->flags |= REQ_F_NOWAIT;
795 if (sr->msg_flags & MSG_ERRQUEUE)
796 req->flags |= REQ_F_CLEAR_POLLIN;
797 if (req->flags & REQ_F_BUFFER_SELECT)
798 sr->buf_group = req->buf_index;
799 sr->mshot_total_len = sr->mshot_len = 0;
800 if (sr->flags & IORING_RECV_MULTISHOT) {
801 if (!(req->flags & REQ_F_BUFFER_SELECT))
802 return -EINVAL;
803 if (sr->msg_flags & MSG_WAITALL)
804 return -EINVAL;
805 if (req->opcode == IORING_OP_RECV) {
806 sr->mshot_len = sr->len;
807 sr->mshot_total_len = READ_ONCE(sqe->optlen);
808 if (sr->mshot_total_len)
809 sr->flags |= IORING_RECV_MSHOT_LIM;
810 } else if (sqe->optlen) {
811 return -EINVAL;
812 }
813 req->flags |= REQ_F_APOLL_MULTISHOT;
814 } else if (sqe->optlen) {
815 return -EINVAL;
816 }
817
818 if (sr->flags & IORING_RECVSEND_BUNDLE) {
819 if (req->opcode == IORING_OP_RECVMSG)
820 return -EINVAL;
821 }
822
823 if (io_is_compat(ctx: req->ctx))
824 sr->msg_flags |= MSG_CMSG_COMPAT;
825
826 sr->nr_multishot_loops = 0;
827 return io_recvmsg_prep_setup(req);
828}
829
830/* bits to clear in old and inherit in new cflags on bundle retry */
831#define CQE_F_MASK (IORING_CQE_F_SOCK_NONEMPTY|IORING_CQE_F_MORE)
832
833/*
834 * Finishes io_recv and io_recvmsg.
835 *
836 * Returns true if it is actually finished, or false if it should run
837 * again (for multishot).
838 */
839static inline bool io_recv_finish(struct io_kiocb *req,
840 struct io_async_msghdr *kmsg,
841 struct io_br_sel *sel, bool mshot_finished,
842 unsigned issue_flags)
843{
844 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
845 unsigned int cflags = 0;
846
847 if (kmsg->msg.msg_inq > 0)
848 cflags |= IORING_CQE_F_SOCK_NONEMPTY;
849
850 if (sel->val > 0 && sr->flags & IORING_RECV_MSHOT_LIM) {
851 /*
852 * If sr->len hits zero, the limit has been reached. Mark
853 * mshot as finished, and flag MSHOT_DONE as well to prevent
854 * a potential bundle from being retried.
855 */
856 sr->mshot_total_len -= min_t(int, sel->val, sr->mshot_total_len);
857 if (!sr->mshot_total_len) {
858 sr->flags |= IORING_RECV_MSHOT_DONE;
859 mshot_finished = true;
860 }
861 }
862
863 if (sr->flags & IORING_RECVSEND_BUNDLE) {
864 size_t this_ret = sel->val - sr->done_io;
865
866 cflags |= io_put_kbufs(req, len: this_ret, bl: sel->buf_list, nbufs: io_bundle_nbufs(kmsg, ret: this_ret));
867 if (sr->flags & IORING_RECV_RETRY)
868 cflags = req->cqe.flags | (cflags & CQE_F_MASK);
869 if (sr->mshot_len && sel->val >= sr->mshot_len)
870 sr->flags |= IORING_RECV_MSHOT_CAP;
871 /* bundle with no more immediate buffers, we're done */
872 if (req->flags & REQ_F_BL_EMPTY)
873 goto finish;
874 /*
875 * If more is available AND it was a full transfer, retry and
876 * append to this one
877 */
878 if (!(sr->flags & IORING_RECV_NO_RETRY) &&
879 kmsg->msg.msg_inq > 1 && this_ret > 0 &&
880 !iov_iter_count(i: &kmsg->msg.msg_iter)) {
881 req->cqe.flags = cflags & ~CQE_F_MASK;
882 sr->len = kmsg->msg.msg_inq;
883 sr->done_io += this_ret;
884 sr->flags |= IORING_RECV_RETRY;
885 return false;
886 }
887 } else {
888 cflags |= io_put_kbuf(req, len: sel->val, bl: sel->buf_list);
889 }
890
891 /*
892 * Fill CQE for this receive and see if we should keep trying to
893 * receive from this socket.
894 */
895 if ((req->flags & REQ_F_APOLL_MULTISHOT) && !mshot_finished &&
896 io_req_post_cqe(req, res: sel->val, cflags: cflags | IORING_CQE_F_MORE)) {
897 sel->val = IOU_RETRY;
898 io_mshot_prep_retry(req, kmsg);
899 /* Known not-empty or unknown state, retry */
900 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || kmsg->msg.msg_inq < 0) {
901 if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY &&
902 !(sr->flags & IORING_RECV_MSHOT_CAP)) {
903 return false;
904 }
905 /* mshot retries exceeded, force a requeue */
906 sr->nr_multishot_loops = 0;
907 sr->flags &= ~IORING_RECV_MSHOT_CAP;
908 if (issue_flags & IO_URING_F_MULTISHOT)
909 sel->val = IOU_REQUEUE;
910 }
911 return true;
912 }
913
914 /* Finish the request / stop multishot. */
915finish:
916 io_req_set_res(req, res: sel->val, cflags);
917 sel->val = IOU_COMPLETE;
918 io_req_msg_cleanup(req, issue_flags);
919 return true;
920}
921
922static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg,
923 struct io_sr_msg *sr, void __user **buf,
924 size_t *len)
925{
926 unsigned long ubuf = (unsigned long) *buf;
927 unsigned long hdr;
928
929 hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen +
930 kmsg->controllen;
931 if (*len < hdr)
932 return -EFAULT;
933
934 if (kmsg->controllen) {
935 unsigned long control = ubuf + hdr - kmsg->controllen;
936
937 kmsg->msg.msg_control_user = (void __user *) control;
938 kmsg->msg.msg_controllen = kmsg->controllen;
939 }
940
941 sr->buf = *buf; /* stash for later copy */
942 *buf = (void __user *) (ubuf + hdr);
943 kmsg->payloadlen = *len = *len - hdr;
944 return 0;
945}
946
947struct io_recvmsg_multishot_hdr {
948 struct io_uring_recvmsg_out msg;
949 struct sockaddr_storage addr;
950};
951
952static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io,
953 struct io_async_msghdr *kmsg,
954 unsigned int flags, bool *finished)
955{
956 int err;
957 int copy_len;
958 struct io_recvmsg_multishot_hdr hdr;
959
960 if (kmsg->namelen)
961 kmsg->msg.msg_name = &hdr.addr;
962 kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
963 kmsg->msg.msg_namelen = 0;
964
965 if (sock->file->f_flags & O_NONBLOCK)
966 flags |= MSG_DONTWAIT;
967
968 err = sock_recvmsg(sock, msg: &kmsg->msg, flags);
969 *finished = err <= 0;
970 if (err < 0)
971 return err;
972
973 hdr.msg = (struct io_uring_recvmsg_out) {
974 .controllen = kmsg->controllen - kmsg->msg.msg_controllen,
975 .flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT
976 };
977
978 hdr.msg.payloadlen = err;
979 if (err > kmsg->payloadlen)
980 err = kmsg->payloadlen;
981
982 copy_len = sizeof(struct io_uring_recvmsg_out);
983 if (kmsg->msg.msg_namelen > kmsg->namelen)
984 copy_len += kmsg->namelen;
985 else
986 copy_len += kmsg->msg.msg_namelen;
987
988 /*
989 * "fromlen shall refer to the value before truncation.."
990 * 1003.1g
991 */
992 hdr.msg.namelen = kmsg->msg.msg_namelen;
993
994 /* ensure that there is no gap between hdr and sockaddr_storage */
995 BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) !=
996 sizeof(struct io_uring_recvmsg_out));
997 if (copy_to_user(to: io->buf, from: &hdr, n: copy_len)) {
998 *finished = true;
999 return -EFAULT;
1000 }
1001
1002 return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen +
1003 kmsg->controllen + err;
1004}
1005
1006int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
1007{
1008 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1009 struct io_async_msghdr *kmsg = req->async_data;
1010 struct io_br_sel sel = { };
1011 struct socket *sock;
1012 unsigned flags;
1013 int ret, min_ret = 0;
1014 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1015 bool mshot_finished = true;
1016
1017 sock = sock_from_file(file: req->file);
1018 if (unlikely(!sock))
1019 return -ENOTSOCK;
1020
1021 if (!(req->flags & REQ_F_POLLED) &&
1022 (sr->flags & IORING_RECVSEND_POLL_FIRST))
1023 return -EAGAIN;
1024
1025 flags = sr->msg_flags;
1026 if (force_nonblock)
1027 flags |= MSG_DONTWAIT;
1028
1029retry_multishot:
1030 sel.buf_list = NULL;
1031 if (io_do_buffer_select(req)) {
1032 size_t len = sr->len;
1033
1034 sel = io_buffer_select(req, len: &len, buf_group: sr->buf_group, issue_flags);
1035 if (!sel.addr)
1036 return -ENOBUFS;
1037
1038 if (req->flags & REQ_F_APOLL_MULTISHOT) {
1039 ret = io_recvmsg_prep_multishot(kmsg, sr, buf: &sel.addr, len: &len);
1040 if (ret) {
1041 io_kbuf_recycle(req, bl: sel.buf_list, issue_flags);
1042 return ret;
1043 }
1044 }
1045
1046 iov_iter_ubuf(i: &kmsg->msg.msg_iter, ITER_DEST, buf: sel.addr, count: len);
1047 }
1048
1049 kmsg->msg.msg_get_inq = 1;
1050 kmsg->msg.msg_inq = -1;
1051 if (req->flags & REQ_F_APOLL_MULTISHOT) {
1052 ret = io_recvmsg_multishot(sock, io: sr, kmsg, flags,
1053 finished: &mshot_finished);
1054 } else {
1055 /* disable partial retry for recvmsg with cmsg attached */
1056 if (flags & MSG_WAITALL && !kmsg->msg.msg_controllen)
1057 min_ret = iov_iter_count(i: &kmsg->msg.msg_iter);
1058
1059 ret = __sys_recvmsg_sock(sock, msg: &kmsg->msg, umsg: sr->umsg,
1060 uaddr: kmsg->uaddr, flags);
1061 }
1062
1063 if (ret < min_ret) {
1064 if (ret == -EAGAIN && force_nonblock) {
1065 io_kbuf_recycle(req, bl: sel.buf_list, issue_flags);
1066 return IOU_RETRY;
1067 }
1068 if (ret > 0 && io_net_retry(sock, flags)) {
1069 sr->done_io += ret;
1070 return io_net_kbuf_recyle(req, bl: sel.buf_list, kmsg, len: ret);
1071 }
1072 if (ret == -ERESTARTSYS)
1073 ret = -EINTR;
1074 req_set_fail(req);
1075 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
1076 req_set_fail(req);
1077 }
1078
1079 if (ret > 0)
1080 ret += sr->done_io;
1081 else if (sr->done_io)
1082 ret = sr->done_io;
1083 else
1084 io_kbuf_recycle(req, bl: sel.buf_list, issue_flags);
1085
1086 sel.val = ret;
1087 if (!io_recv_finish(req, kmsg, sel: &sel, mshot_finished, issue_flags))
1088 goto retry_multishot;
1089
1090 return sel.val;
1091}
1092
1093static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg,
1094 struct io_br_sel *sel, unsigned int issue_flags)
1095{
1096 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1097 int ret;
1098
1099 /*
1100 * If the ring isn't locked, then don't use the peek interface
1101 * to grab multiple buffers as we will lock/unlock between
1102 * this selection and posting the buffers.
1103 */
1104 if (!(issue_flags & IO_URING_F_UNLOCKED) &&
1105 sr->flags & IORING_RECVSEND_BUNDLE) {
1106 struct buf_sel_arg arg = {
1107 .iovs = &kmsg->fast_iov,
1108 .nr_iovs = 1,
1109 .mode = KBUF_MODE_EXPAND,
1110 .buf_group = sr->buf_group,
1111 };
1112
1113 if (kmsg->vec.iovec) {
1114 arg.nr_iovs = kmsg->vec.nr;
1115 arg.iovs = kmsg->vec.iovec;
1116 arg.mode |= KBUF_MODE_FREE;
1117 }
1118
1119 if (sel->val)
1120 arg.max_len = sel->val;
1121 else if (kmsg->msg.msg_inq > 1)
1122 arg.max_len = min_not_zero(sel->val, (ssize_t) kmsg->msg.msg_inq);
1123
1124 /* if mshot limited, ensure we don't go over */
1125 if (sr->flags & IORING_RECV_MSHOT_LIM)
1126 arg.max_len = min_not_zero(arg.max_len, sr->mshot_total_len);
1127 ret = io_buffers_peek(req, arg: &arg, sel);
1128 if (unlikely(ret < 0))
1129 return ret;
1130
1131 if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) {
1132 kmsg->vec.nr = ret;
1133 kmsg->vec.iovec = arg.iovs;
1134 req->flags |= REQ_F_NEED_CLEANUP;
1135 }
1136 if (arg.partial_map)
1137 sr->flags |= IORING_RECV_PARTIAL_MAP;
1138
1139 /* special case 1 vec, can be a fast path */
1140 if (ret == 1) {
1141 sr->buf = arg.iovs[0].iov_base;
1142 sr->len = arg.iovs[0].iov_len;
1143 goto map_ubuf;
1144 }
1145 iov_iter_init(i: &kmsg->msg.msg_iter, ITER_DEST, iov: arg.iovs, nr_segs: ret,
1146 count: arg.out_len);
1147 } else {
1148 size_t len = sel->val;
1149
1150 *sel = io_buffer_select(req, len: &len, buf_group: sr->buf_group, issue_flags);
1151 if (!sel->addr)
1152 return -ENOBUFS;
1153 sr->buf = sel->addr;
1154 sr->len = len;
1155map_ubuf:
1156 ret = import_ubuf(ITER_DEST, buf: sr->buf, len: sr->len,
1157 i: &kmsg->msg.msg_iter);
1158 if (unlikely(ret))
1159 return ret;
1160 }
1161
1162 return 0;
1163}
1164
1165int io_recv(struct io_kiocb *req, unsigned int issue_flags)
1166{
1167 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1168 struct io_async_msghdr *kmsg = req->async_data;
1169 struct io_br_sel sel;
1170 struct socket *sock;
1171 unsigned flags;
1172 int ret, min_ret = 0;
1173 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1174 bool mshot_finished;
1175
1176 if (!(req->flags & REQ_F_POLLED) &&
1177 (sr->flags & IORING_RECVSEND_POLL_FIRST))
1178 return -EAGAIN;
1179
1180 sock = sock_from_file(file: req->file);
1181 if (unlikely(!sock))
1182 return -ENOTSOCK;
1183
1184 flags = sr->msg_flags;
1185 if (force_nonblock)
1186 flags |= MSG_DONTWAIT;
1187
1188retry_multishot:
1189 sel.buf_list = NULL;
1190 if (io_do_buffer_select(req)) {
1191 sel.val = sr->len;
1192 ret = io_recv_buf_select(req, kmsg, sel: &sel, issue_flags);
1193 if (unlikely(ret < 0)) {
1194 kmsg->msg.msg_inq = -1;
1195 goto out_free;
1196 }
1197 sr->buf = NULL;
1198 }
1199
1200 kmsg->msg.msg_flags = 0;
1201 kmsg->msg.msg_inq = -1;
1202
1203 if (flags & MSG_WAITALL)
1204 min_ret = iov_iter_count(i: &kmsg->msg.msg_iter);
1205
1206 ret = sock_recvmsg(sock, msg: &kmsg->msg, flags);
1207 if (ret < min_ret) {
1208 if (ret == -EAGAIN && force_nonblock) {
1209 io_kbuf_recycle(req, bl: sel.buf_list, issue_flags);
1210 return IOU_RETRY;
1211 }
1212 if (ret > 0 && io_net_retry(sock, flags)) {
1213 sr->len -= ret;
1214 sr->buf += ret;
1215 sr->done_io += ret;
1216 return io_net_kbuf_recyle(req, bl: sel.buf_list, kmsg, len: ret);
1217 }
1218 if (ret == -ERESTARTSYS)
1219 ret = -EINTR;
1220 req_set_fail(req);
1221 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
1222out_free:
1223 req_set_fail(req);
1224 }
1225
1226 mshot_finished = ret <= 0;
1227 if (ret > 0)
1228 ret += sr->done_io;
1229 else if (sr->done_io)
1230 ret = sr->done_io;
1231 else
1232 io_kbuf_recycle(req, bl: sel.buf_list, issue_flags);
1233
1234 sel.val = ret;
1235 if (!io_recv_finish(req, kmsg, sel: &sel, mshot_finished, issue_flags))
1236 goto retry_multishot;
1237
1238 return sel.val;
1239}
1240
1241int io_recvzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1242{
1243 struct io_recvzc *zc = io_kiocb_to_cmd(req, struct io_recvzc);
1244 unsigned ifq_idx;
1245
1246 if (unlikely(sqe->addr2 || sqe->addr || sqe->addr3))
1247 return -EINVAL;
1248
1249 ifq_idx = READ_ONCE(sqe->zcrx_ifq_idx);
1250 zc->ifq = xa_load(&req->ctx->zcrx_ctxs, index: ifq_idx);
1251 if (!zc->ifq)
1252 return -EINVAL;
1253
1254 zc->len = READ_ONCE(sqe->len);
1255 zc->flags = READ_ONCE(sqe->ioprio);
1256 zc->msg_flags = READ_ONCE(sqe->msg_flags);
1257 if (zc->msg_flags)
1258 return -EINVAL;
1259 if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT))
1260 return -EINVAL;
1261 /* multishot required */
1262 if (!(zc->flags & IORING_RECV_MULTISHOT))
1263 return -EINVAL;
1264 /* All data completions are posted as aux CQEs. */
1265 req->flags |= REQ_F_APOLL_MULTISHOT;
1266
1267 return 0;
1268}
1269
1270int io_recvzc(struct io_kiocb *req, unsigned int issue_flags)
1271{
1272 struct io_recvzc *zc = io_kiocb_to_cmd(req, struct io_recvzc);
1273 struct socket *sock;
1274 unsigned int len;
1275 int ret;
1276
1277 if (!(req->flags & REQ_F_POLLED) &&
1278 (zc->flags & IORING_RECVSEND_POLL_FIRST))
1279 return -EAGAIN;
1280
1281 sock = sock_from_file(file: req->file);
1282 if (unlikely(!sock))
1283 return -ENOTSOCK;
1284
1285 len = zc->len;
1286 ret = io_zcrx_recv(req, ifq: zc->ifq, sock, flags: zc->msg_flags | MSG_DONTWAIT,
1287 issue_flags, len: &zc->len);
1288 if (len && zc->len == 0) {
1289 io_req_set_res(req, res: 0, cflags: 0);
1290
1291 return IOU_COMPLETE;
1292 }
1293 if (unlikely(ret <= 0) && ret != -EAGAIN) {
1294 if (ret == -ERESTARTSYS)
1295 ret = -EINTR;
1296 if (ret == IOU_REQUEUE)
1297 return IOU_REQUEUE;
1298
1299 req_set_fail(req);
1300 io_req_set_res(req, res: ret, cflags: 0);
1301 return IOU_COMPLETE;
1302 }
1303 return IOU_RETRY;
1304}
1305
1306void io_send_zc_cleanup(struct io_kiocb *req)
1307{
1308 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
1309 struct io_async_msghdr *io = req->async_data;
1310
1311 if (req_has_async_data(req))
1312 io_netmsg_iovec_free(kmsg: io);
1313 if (zc->notif) {
1314 io_notif_flush(notif: zc->notif);
1315 zc->notif = NULL;
1316 }
1317}
1318
1319#define IO_ZC_FLAGS_COMMON (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_FIXED_BUF)
1320#define IO_ZC_FLAGS_VALID (IO_ZC_FLAGS_COMMON | IORING_SEND_ZC_REPORT_USAGE | \
1321 IORING_SEND_VECTORIZED)
1322
1323int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1324{
1325 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
1326 struct io_ring_ctx *ctx = req->ctx;
1327 struct io_async_msghdr *iomsg;
1328 struct io_kiocb *notif;
1329 int ret;
1330
1331 zc->done_io = 0;
1332
1333 if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)))
1334 return -EINVAL;
1335 /* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */
1336 if (req->flags & REQ_F_CQE_SKIP)
1337 return -EINVAL;
1338
1339 notif = zc->notif = io_alloc_notif(ctx);
1340 if (!notif)
1341 return -ENOMEM;
1342 notif->cqe.user_data = req->cqe.user_data;
1343 notif->cqe.res = 0;
1344 notif->cqe.flags = IORING_CQE_F_NOTIF;
1345 req->flags |= REQ_F_NEED_CLEANUP | REQ_F_POLL_NO_LAZY;
1346
1347 zc->flags = READ_ONCE(sqe->ioprio);
1348 if (unlikely(zc->flags & ~IO_ZC_FLAGS_COMMON)) {
1349 if (zc->flags & ~IO_ZC_FLAGS_VALID)
1350 return -EINVAL;
1351 if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) {
1352 struct io_notif_data *nd = io_notif_to_data(notif);
1353
1354 nd->zc_report = true;
1355 nd->zc_used = false;
1356 nd->zc_copied = false;
1357 }
1358 }
1359
1360 zc->len = READ_ONCE(sqe->len);
1361 zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL | MSG_ZEROCOPY;
1362 req->buf_index = READ_ONCE(sqe->buf_index);
1363 if (zc->msg_flags & MSG_DONTWAIT)
1364 req->flags |= REQ_F_NOWAIT;
1365
1366 if (io_is_compat(ctx: req->ctx))
1367 zc->msg_flags |= MSG_CMSG_COMPAT;
1368
1369 iomsg = io_msg_alloc_async(req);
1370 if (unlikely(!iomsg))
1371 return -ENOMEM;
1372
1373 if (req->opcode == IORING_OP_SEND_ZC) {
1374 ret = io_send_setup(req, sqe);
1375 } else {
1376 if (unlikely(sqe->addr2 || sqe->file_index))
1377 return -EINVAL;
1378 ret = io_sendmsg_setup(req, sqe);
1379 }
1380 if (unlikely(ret))
1381 return ret;
1382
1383 if (!(zc->flags & IORING_RECVSEND_FIXED_BUF)) {
1384 iomsg->msg.sg_from_iter = io_sg_from_iter_iovec;
1385 return io_notif_account_mem(notif: zc->notif, len: iomsg->msg.msg_iter.count);
1386 }
1387 iomsg->msg.sg_from_iter = io_sg_from_iter;
1388 return 0;
1389}
1390
1391static int io_sg_from_iter_iovec(struct sk_buff *skb,
1392 struct iov_iter *from, size_t length)
1393{
1394 skb_zcopy_downgrade_managed(skb);
1395 return zerocopy_fill_skb_from_iter(skb, from, length);
1396}
1397
1398static int io_sg_from_iter(struct sk_buff *skb,
1399 struct iov_iter *from, size_t length)
1400{
1401 struct skb_shared_info *shinfo = skb_shinfo(skb);
1402 int frag = shinfo->nr_frags;
1403 int ret = 0;
1404 struct bvec_iter bi;
1405 ssize_t copied = 0;
1406 unsigned long truesize = 0;
1407
1408 if (!frag)
1409 shinfo->flags |= SKBFL_MANAGED_FRAG_REFS;
1410 else if (unlikely(!skb_zcopy_managed(skb)))
1411 return zerocopy_fill_skb_from_iter(skb, from, length);
1412
1413 bi.bi_size = min(from->count, length);
1414 bi.bi_bvec_done = from->iov_offset;
1415 bi.bi_idx = 0;
1416
1417 while (bi.bi_size && frag < MAX_SKB_FRAGS) {
1418 struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi);
1419
1420 copied += v.bv_len;
1421 truesize += PAGE_ALIGN(v.bv_len + v.bv_offset);
1422 __skb_fill_page_desc_noacc(shinfo, i: frag++, page: v.bv_page,
1423 off: v.bv_offset, size: v.bv_len);
1424 bvec_iter_advance_single(bv: from->bvec, iter: &bi, bytes: v.bv_len);
1425 }
1426 if (bi.bi_size)
1427 ret = -EMSGSIZE;
1428
1429 shinfo->nr_frags = frag;
1430 from->bvec += bi.bi_idx;
1431 from->nr_segs -= bi.bi_idx;
1432 from->count -= copied;
1433 from->iov_offset = bi.bi_bvec_done;
1434
1435 skb->data_len += copied;
1436 skb->len += copied;
1437 skb->truesize += truesize;
1438 return ret;
1439}
1440
1441static int io_send_zc_import(struct io_kiocb *req, unsigned int issue_flags)
1442{
1443 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1444 struct io_async_msghdr *kmsg = req->async_data;
1445
1446 WARN_ON_ONCE(!(sr->flags & IORING_RECVSEND_FIXED_BUF));
1447
1448 sr->notif->buf_index = req->buf_index;
1449 return io_import_reg_buf(req: sr->notif, iter: &kmsg->msg.msg_iter,
1450 buf_addr: (u64)(uintptr_t)sr->buf, len: sr->len,
1451 ITER_SOURCE, issue_flags);
1452}
1453
1454int io_send_zc(struct io_kiocb *req, unsigned int issue_flags)
1455{
1456 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
1457 struct io_async_msghdr *kmsg = req->async_data;
1458 struct socket *sock;
1459 unsigned msg_flags;
1460 int ret, min_ret = 0;
1461
1462 sock = sock_from_file(file: req->file);
1463 if (unlikely(!sock))
1464 return -ENOTSOCK;
1465 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags))
1466 return -EOPNOTSUPP;
1467
1468 if (!(req->flags & REQ_F_POLLED) &&
1469 (zc->flags & IORING_RECVSEND_POLL_FIRST))
1470 return -EAGAIN;
1471
1472 if (req->flags & REQ_F_IMPORT_BUFFER) {
1473 req->flags &= ~REQ_F_IMPORT_BUFFER;
1474 ret = io_send_zc_import(req, issue_flags);
1475 if (unlikely(ret))
1476 return ret;
1477 }
1478
1479 msg_flags = zc->msg_flags;
1480 if (issue_flags & IO_URING_F_NONBLOCK)
1481 msg_flags |= MSG_DONTWAIT;
1482 if (msg_flags & MSG_WAITALL)
1483 min_ret = iov_iter_count(i: &kmsg->msg.msg_iter);
1484 msg_flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
1485
1486 kmsg->msg.msg_flags = msg_flags;
1487 kmsg->msg.msg_ubuf = &io_notif_to_data(notif: zc->notif)->uarg;
1488 ret = sock_sendmsg(sock, msg: &kmsg->msg);
1489
1490 if (unlikely(ret < min_ret)) {
1491 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1492 return -EAGAIN;
1493
1494 if (ret > 0 && io_net_retry(sock, flags: kmsg->msg.msg_flags)) {
1495 zc->len -= ret;
1496 zc->buf += ret;
1497 zc->done_io += ret;
1498 return -EAGAIN;
1499 }
1500 if (ret == -ERESTARTSYS)
1501 ret = -EINTR;
1502 req_set_fail(req);
1503 }
1504
1505 if (ret >= 0)
1506 ret += zc->done_io;
1507 else if (zc->done_io)
1508 ret = zc->done_io;
1509
1510 /*
1511 * If we're in io-wq we can't rely on tw ordering guarantees, defer
1512 * flushing notif to io_send_zc_cleanup()
1513 */
1514 if (!(issue_flags & IO_URING_F_UNLOCKED)) {
1515 io_notif_flush(notif: zc->notif);
1516 zc->notif = NULL;
1517 io_req_msg_cleanup(req, issue_flags: 0);
1518 }
1519 io_req_set_res(req, res: ret, IORING_CQE_F_MORE);
1520 return IOU_COMPLETE;
1521}
1522
1523int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)
1524{
1525 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1526 struct io_async_msghdr *kmsg = req->async_data;
1527 struct socket *sock;
1528 unsigned flags;
1529 int ret, min_ret = 0;
1530
1531 if (req->flags & REQ_F_IMPORT_BUFFER) {
1532 unsigned uvec_segs = kmsg->msg.msg_iter.nr_segs;
1533 int ret;
1534
1535 ret = io_import_reg_vec(ITER_SOURCE, iter: &kmsg->msg.msg_iter, req,
1536 vec: &kmsg->vec, nr_iovs: uvec_segs, issue_flags);
1537 if (unlikely(ret))
1538 return ret;
1539 req->flags &= ~REQ_F_IMPORT_BUFFER;
1540 }
1541
1542 sock = sock_from_file(file: req->file);
1543 if (unlikely(!sock))
1544 return -ENOTSOCK;
1545 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags))
1546 return -EOPNOTSUPP;
1547
1548 if (!(req->flags & REQ_F_POLLED) &&
1549 (sr->flags & IORING_RECVSEND_POLL_FIRST))
1550 return -EAGAIN;
1551
1552 flags = sr->msg_flags;
1553 if (issue_flags & IO_URING_F_NONBLOCK)
1554 flags |= MSG_DONTWAIT;
1555 if (flags & MSG_WAITALL)
1556 min_ret = iov_iter_count(i: &kmsg->msg.msg_iter);
1557
1558 kmsg->msg.msg_control_user = sr->msg_control;
1559 kmsg->msg.msg_ubuf = &io_notif_to_data(notif: sr->notif)->uarg;
1560 ret = __sys_sendmsg_sock(sock, msg: &kmsg->msg, flags);
1561
1562 if (unlikely(ret < min_ret)) {
1563 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1564 return -EAGAIN;
1565
1566 if (ret > 0 && io_net_retry(sock, flags)) {
1567 sr->done_io += ret;
1568 return -EAGAIN;
1569 }
1570 if (ret == -ERESTARTSYS)
1571 ret = -EINTR;
1572 req_set_fail(req);
1573 }
1574
1575 if (ret >= 0)
1576 ret += sr->done_io;
1577 else if (sr->done_io)
1578 ret = sr->done_io;
1579
1580 /*
1581 * If we're in io-wq we can't rely on tw ordering guarantees, defer
1582 * flushing notif to io_send_zc_cleanup()
1583 */
1584 if (!(issue_flags & IO_URING_F_UNLOCKED)) {
1585 io_notif_flush(notif: sr->notif);
1586 sr->notif = NULL;
1587 io_req_msg_cleanup(req, issue_flags: 0);
1588 }
1589 io_req_set_res(req, res: ret, IORING_CQE_F_MORE);
1590 return IOU_COMPLETE;
1591}
1592
1593void io_sendrecv_fail(struct io_kiocb *req)
1594{
1595 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1596
1597 if (sr->done_io)
1598 req->cqe.res = sr->done_io;
1599
1600 if ((req->flags & REQ_F_NEED_CLEANUP) &&
1601 (req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC))
1602 req->cqe.flags |= IORING_CQE_F_MORE;
1603}
1604
1605#define ACCEPT_FLAGS (IORING_ACCEPT_MULTISHOT | IORING_ACCEPT_DONTWAIT | \
1606 IORING_ACCEPT_POLL_FIRST)
1607
1608int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1609{
1610 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
1611
1612 if (sqe->len || sqe->buf_index)
1613 return -EINVAL;
1614
1615 accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1616 accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
1617 accept->flags = READ_ONCE(sqe->accept_flags);
1618 accept->nofile = rlimit(RLIMIT_NOFILE);
1619 accept->iou_flags = READ_ONCE(sqe->ioprio);
1620 if (accept->iou_flags & ~ACCEPT_FLAGS)
1621 return -EINVAL;
1622
1623 accept->file_slot = READ_ONCE(sqe->file_index);
1624 if (accept->file_slot) {
1625 if (accept->flags & SOCK_CLOEXEC)
1626 return -EINVAL;
1627 if (accept->iou_flags & IORING_ACCEPT_MULTISHOT &&
1628 accept->file_slot != IORING_FILE_INDEX_ALLOC)
1629 return -EINVAL;
1630 }
1631 if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1632 return -EINVAL;
1633 if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK))
1634 accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1635 if (accept->iou_flags & IORING_ACCEPT_MULTISHOT)
1636 req->flags |= REQ_F_APOLL_MULTISHOT;
1637 if (accept->iou_flags & IORING_ACCEPT_DONTWAIT)
1638 req->flags |= REQ_F_NOWAIT;
1639 return 0;
1640}
1641
1642int io_accept(struct io_kiocb *req, unsigned int issue_flags)
1643{
1644 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
1645 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1646 bool fixed = !!accept->file_slot;
1647 struct proto_accept_arg arg = {
1648 .flags = force_nonblock ? O_NONBLOCK : 0,
1649 };
1650 struct file *file;
1651 unsigned cflags;
1652 int ret, fd;
1653
1654 if (!(req->flags & REQ_F_POLLED) &&
1655 accept->iou_flags & IORING_ACCEPT_POLL_FIRST)
1656 return -EAGAIN;
1657
1658retry:
1659 if (!fixed) {
1660 fd = __get_unused_fd_flags(flags: accept->flags, nofile: accept->nofile);
1661 if (unlikely(fd < 0))
1662 return fd;
1663 }
1664 arg.err = 0;
1665 arg.is_empty = -1;
1666 file = do_accept(file: req->file, arg: &arg, upeer_sockaddr: accept->addr, upeer_addrlen: accept->addr_len,
1667 flags: accept->flags);
1668 if (IS_ERR(ptr: file)) {
1669 if (!fixed)
1670 put_unused_fd(fd);
1671 ret = PTR_ERR(ptr: file);
1672 if (ret == -EAGAIN && force_nonblock &&
1673 !(accept->iou_flags & IORING_ACCEPT_DONTWAIT))
1674 return IOU_RETRY;
1675
1676 if (ret == -ERESTARTSYS)
1677 ret = -EINTR;
1678 } else if (!fixed) {
1679 fd_install(fd, file);
1680 ret = fd;
1681 } else {
1682 ret = io_fixed_fd_install(req, issue_flags, file,
1683 file_slot: accept->file_slot);
1684 }
1685
1686 cflags = 0;
1687 if (!arg.is_empty)
1688 cflags |= IORING_CQE_F_SOCK_NONEMPTY;
1689
1690 if (ret >= 0 && (req->flags & REQ_F_APOLL_MULTISHOT) &&
1691 io_req_post_cqe(req, res: ret, cflags: cflags | IORING_CQE_F_MORE)) {
1692 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || arg.is_empty == -1)
1693 goto retry;
1694 return IOU_RETRY;
1695 }
1696
1697 io_req_set_res(req, res: ret, cflags);
1698 if (ret < 0)
1699 req_set_fail(req);
1700 return IOU_COMPLETE;
1701}
1702
1703int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1704{
1705 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket);
1706
1707 if (sqe->addr || sqe->rw_flags || sqe->buf_index)
1708 return -EINVAL;
1709
1710 sock->domain = READ_ONCE(sqe->fd);
1711 sock->type = READ_ONCE(sqe->off);
1712 sock->protocol = READ_ONCE(sqe->len);
1713 sock->file_slot = READ_ONCE(sqe->file_index);
1714 sock->nofile = rlimit(RLIMIT_NOFILE);
1715
1716 sock->flags = sock->type & ~SOCK_TYPE_MASK;
1717 if (sock->file_slot && (sock->flags & SOCK_CLOEXEC))
1718 return -EINVAL;
1719 if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1720 return -EINVAL;
1721 return 0;
1722}
1723
1724int io_socket(struct io_kiocb *req, unsigned int issue_flags)
1725{
1726 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket);
1727 bool fixed = !!sock->file_slot;
1728 struct file *file;
1729 int ret, fd;
1730
1731 if (!fixed) {
1732 fd = __get_unused_fd_flags(flags: sock->flags, nofile: sock->nofile);
1733 if (unlikely(fd < 0))
1734 return fd;
1735 }
1736 file = __sys_socket_file(family: sock->domain, type: sock->type, protocol: sock->protocol);
1737 if (IS_ERR(ptr: file)) {
1738 if (!fixed)
1739 put_unused_fd(fd);
1740 ret = PTR_ERR(ptr: file);
1741 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1742 return -EAGAIN;
1743 if (ret == -ERESTARTSYS)
1744 ret = -EINTR;
1745 req_set_fail(req);
1746 } else if (!fixed) {
1747 fd_install(fd, file);
1748 ret = fd;
1749 } else {
1750 ret = io_fixed_fd_install(req, issue_flags, file,
1751 file_slot: sock->file_slot);
1752 }
1753 io_req_set_res(req, res: ret, cflags: 0);
1754 return IOU_COMPLETE;
1755}
1756
1757int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1758{
1759 struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect);
1760 struct io_async_msghdr *io;
1761
1762 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
1763 return -EINVAL;
1764
1765 conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1766 conn->addr_len = READ_ONCE(sqe->addr2);
1767 conn->in_progress = conn->seen_econnaborted = false;
1768
1769 io = io_msg_alloc_async(req);
1770 if (unlikely(!io))
1771 return -ENOMEM;
1772
1773 return move_addr_to_kernel(uaddr: conn->addr, ulen: conn->addr_len, kaddr: &io->addr);
1774}
1775
1776int io_connect(struct io_kiocb *req, unsigned int issue_flags)
1777{
1778 struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect);
1779 struct io_async_msghdr *io = req->async_data;
1780 unsigned file_flags;
1781 int ret;
1782 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1783
1784 if (connect->in_progress) {
1785 struct poll_table_struct pt = { ._key = EPOLLERR };
1786
1787 if (vfs_poll(file: req->file, pt: &pt) & EPOLLERR)
1788 goto get_sock_err;
1789 }
1790
1791 file_flags = force_nonblock ? O_NONBLOCK : 0;
1792
1793 ret = __sys_connect_file(file: req->file, addr: &io->addr, addrlen: connect->addr_len,
1794 file_flags);
1795 if ((ret == -EAGAIN || ret == -EINPROGRESS || ret == -ECONNABORTED)
1796 && force_nonblock) {
1797 if (ret == -EINPROGRESS) {
1798 connect->in_progress = true;
1799 } else if (ret == -ECONNABORTED) {
1800 if (connect->seen_econnaborted)
1801 goto out;
1802 connect->seen_econnaborted = true;
1803 }
1804 return -EAGAIN;
1805 }
1806 if (connect->in_progress) {
1807 /*
1808 * At least bluetooth will return -EBADFD on a re-connect
1809 * attempt, and it's (supposedly) also valid to get -EISCONN
1810 * which means the previous result is good. For both of these,
1811 * grab the sock_error() and use that for the completion.
1812 */
1813 if (ret == -EBADFD || ret == -EISCONN) {
1814get_sock_err:
1815 ret = sock_error(sk: sock_from_file(file: req->file)->sk);
1816 }
1817 }
1818 if (ret == -ERESTARTSYS)
1819 ret = -EINTR;
1820out:
1821 if (ret < 0)
1822 req_set_fail(req);
1823 io_req_msg_cleanup(req, issue_flags);
1824 io_req_set_res(req, res: ret, cflags: 0);
1825 return IOU_COMPLETE;
1826}
1827
1828int io_bind_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1829{
1830 struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind);
1831 struct sockaddr __user *uaddr;
1832 struct io_async_msghdr *io;
1833
1834 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
1835 return -EINVAL;
1836
1837 uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1838 bind->addr_len = READ_ONCE(sqe->addr2);
1839
1840 io = io_msg_alloc_async(req);
1841 if (unlikely(!io))
1842 return -ENOMEM;
1843 return move_addr_to_kernel(uaddr, ulen: bind->addr_len, kaddr: &io->addr);
1844}
1845
1846int io_bind(struct io_kiocb *req, unsigned int issue_flags)
1847{
1848 struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind);
1849 struct io_async_msghdr *io = req->async_data;
1850 struct socket *sock;
1851 int ret;
1852
1853 sock = sock_from_file(file: req->file);
1854 if (unlikely(!sock))
1855 return -ENOTSOCK;
1856
1857 ret = __sys_bind_socket(sock, address: &io->addr, addrlen: bind->addr_len);
1858 if (ret < 0)
1859 req_set_fail(req);
1860 io_req_set_res(req, res: ret, cflags: 0);
1861 return 0;
1862}
1863
1864int io_listen_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1865{
1866 struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen);
1867
1868 if (sqe->addr || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in || sqe->addr2)
1869 return -EINVAL;
1870
1871 listen->backlog = READ_ONCE(sqe->len);
1872 return 0;
1873}
1874
1875int io_listen(struct io_kiocb *req, unsigned int issue_flags)
1876{
1877 struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen);
1878 struct socket *sock;
1879 int ret;
1880
1881 sock = sock_from_file(file: req->file);
1882 if (unlikely(!sock))
1883 return -ENOTSOCK;
1884
1885 ret = __sys_listen_socket(sock, backlog: listen->backlog);
1886 if (ret < 0)
1887 req_set_fail(req);
1888 io_req_set_res(req, res: ret, cflags: 0);
1889 return 0;
1890}
1891
1892void io_netmsg_cache_free(const void *entry)
1893{
1894 struct io_async_msghdr *kmsg = (struct io_async_msghdr *) entry;
1895
1896 io_vec_free(iv: &kmsg->vec);
1897 kfree(objp: kmsg);
1898}
1899