1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/kernel.h>
3 #include <linux/errno.h>
4 #include <linux/file.h>
5 #include <linux/slab.h>
6 #include <linux/net.h>
7 #include <linux/compat.h>
8 #include <net/compat.h>
9 #include <linux/io_uring.h>
10
11 #include <uapi/linux/io_uring.h>
12
13 #include "io_uring.h"
14 #include "kbuf.h"
15 #include "alloc_cache.h"
16 #include "net.h"
17 #include "notif.h"
18 #include "rsrc.h"
19
20 #if defined(CONFIG_NET)
21 struct io_shutdown {
22 struct file *file;
23 int how;
24 };
25
26 struct io_accept {
27 struct file *file;
28 struct sockaddr __user *addr;
29 int __user *addr_len;
30 int flags;
31 u32 file_slot;
32 unsigned long nofile;
33 };
34
35 struct io_socket {
36 struct file *file;
37 int domain;
38 int type;
39 int protocol;
40 int flags;
41 u32 file_slot;
42 unsigned long nofile;
43 };
44
45 struct io_connect {
46 struct file *file;
47 struct sockaddr __user *addr;
48 int addr_len;
49 bool in_progress;
50 bool seen_econnaborted;
51 };
52
53 struct io_sr_msg {
54 struct file *file;
55 union {
56 struct compat_msghdr __user *umsg_compat;
57 struct user_msghdr __user *umsg;
58 void __user *buf;
59 };
60 unsigned len;
61 unsigned done_io;
62 unsigned msg_flags;
63 unsigned nr_multishot_loops;
64 u16 flags;
65 /* initialised and used only by !msg send variants */
66 u16 addr_len;
67 u16 buf_group;
68 void __user *addr;
69 void __user *msg_control;
70 /* used only for send zerocopy */
71 struct io_kiocb *notif;
72 };
73
74 /*
75 * Number of times we'll try and do receives if there's more data. If we
76 * exceed this limit, then add us to the back of the queue and retry from
77 * there. This helps fairness between flooding clients.
78 */
79 #define MULTISHOT_MAX_RETRY 32
80
io_check_multishot(struct io_kiocb * req,unsigned int issue_flags)81 static inline bool io_check_multishot(struct io_kiocb *req,
82 unsigned int issue_flags)
83 {
84 /*
85 * When ->locked_cq is set we only allow to post CQEs from the original
86 * task context. Usual request completions will be handled in other
87 * generic paths but multipoll may decide to post extra cqes.
88 */
89 return !(issue_flags & IO_URING_F_IOWQ) ||
90 !(req->flags & REQ_F_APOLL_MULTISHOT) ||
91 !req->ctx->task_complete;
92 }
93
io_shutdown_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)94 int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
95 {
96 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown);
97
98 if (unlikely(sqe->off || sqe->addr || sqe->rw_flags ||
99 sqe->buf_index || sqe->splice_fd_in))
100 return -EINVAL;
101
102 shutdown->how = READ_ONCE(sqe->len);
103 req->flags |= REQ_F_FORCE_ASYNC;
104 return 0;
105 }
106
io_shutdown(struct io_kiocb * req,unsigned int issue_flags)107 int io_shutdown(struct io_kiocb *req, unsigned int issue_flags)
108 {
109 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown);
110 struct socket *sock;
111 int ret;
112
113 WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
114
115 sock = sock_from_file(req->file);
116 if (unlikely(!sock))
117 return -ENOTSOCK;
118
119 ret = __sys_shutdown_sock(sock, shutdown->how);
120 io_req_set_res(req, ret, 0);
121 return IOU_OK;
122 }
123
io_net_retry(struct socket * sock,int flags)124 static bool io_net_retry(struct socket *sock, int flags)
125 {
126 if (!(flags & MSG_WAITALL))
127 return false;
128 return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET;
129 }
130
io_netmsg_recycle(struct io_kiocb * req,unsigned int issue_flags)131 static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags)
132 {
133 struct io_async_msghdr *hdr = req->async_data;
134
135 if (!req_has_async_data(req) || issue_flags & IO_URING_F_UNLOCKED)
136 return;
137
138 /* Let normal cleanup path reap it if we fail adding to the cache */
139 if (io_alloc_cache_put(&req->ctx->netmsg_cache, &hdr->cache)) {
140 req->async_data = NULL;
141 req->flags &= ~REQ_F_ASYNC_DATA;
142 }
143 }
144
io_msg_alloc_async(struct io_kiocb * req,unsigned int issue_flags)145 static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req,
146 unsigned int issue_flags)
147 {
148 struct io_ring_ctx *ctx = req->ctx;
149 struct io_cache_entry *entry;
150 struct io_async_msghdr *hdr;
151
152 if (!(issue_flags & IO_URING_F_UNLOCKED)) {
153 entry = io_alloc_cache_get(&ctx->netmsg_cache);
154 if (entry) {
155 hdr = container_of(entry, struct io_async_msghdr, cache);
156 hdr->free_iov = NULL;
157 req->flags |= REQ_F_ASYNC_DATA;
158 req->async_data = hdr;
159 return hdr;
160 }
161 }
162
163 if (!io_alloc_async_data(req)) {
164 hdr = req->async_data;
165 hdr->free_iov = NULL;
166 return hdr;
167 }
168 return NULL;
169 }
170
io_msg_alloc_async_prep(struct io_kiocb * req)171 static inline struct io_async_msghdr *io_msg_alloc_async_prep(struct io_kiocb *req)
172 {
173 /* ->prep_async is always called from the submission context */
174 return io_msg_alloc_async(req, 0);
175 }
176
io_setup_async_msg(struct io_kiocb * req,struct io_async_msghdr * kmsg,unsigned int issue_flags)177 static int io_setup_async_msg(struct io_kiocb *req,
178 struct io_async_msghdr *kmsg,
179 unsigned int issue_flags)
180 {
181 struct io_async_msghdr *async_msg;
182
183 if (req_has_async_data(req))
184 return -EAGAIN;
185 async_msg = io_msg_alloc_async(req, issue_flags);
186 if (!async_msg) {
187 kfree(kmsg->free_iov);
188 return -ENOMEM;
189 }
190 req->flags |= REQ_F_NEED_CLEANUP;
191 memcpy(async_msg, kmsg, sizeof(*kmsg));
192 if (async_msg->msg.msg_name)
193 async_msg->msg.msg_name = &async_msg->addr;
194
195 if ((req->flags & REQ_F_BUFFER_SELECT) && !async_msg->msg.msg_iter.nr_segs)
196 return -EAGAIN;
197
198 /* if were using fast_iov, set it to the new one */
199 if (iter_is_iovec(&kmsg->msg.msg_iter) && !kmsg->free_iov) {
200 size_t fast_idx = iter_iov(&kmsg->msg.msg_iter) - kmsg->fast_iov;
201 async_msg->msg.msg_iter.__iov = &async_msg->fast_iov[fast_idx];
202 }
203
204 return -EAGAIN;
205 }
206
207 #ifdef CONFIG_COMPAT
io_compat_msg_copy_hdr(struct io_kiocb * req,struct io_async_msghdr * iomsg,struct compat_msghdr * msg,int ddir)208 static int io_compat_msg_copy_hdr(struct io_kiocb *req,
209 struct io_async_msghdr *iomsg,
210 struct compat_msghdr *msg, int ddir)
211 {
212 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
213 struct compat_iovec __user *uiov;
214 int ret;
215
216 if (copy_from_user(msg, sr->umsg_compat, sizeof(*msg)))
217 return -EFAULT;
218
219 uiov = compat_ptr(msg->msg_iov);
220 if (req->flags & REQ_F_BUFFER_SELECT) {
221 compat_ssize_t clen;
222
223 iomsg->free_iov = NULL;
224 if (msg->msg_iovlen == 0) {
225 sr->len = 0;
226 } else if (msg->msg_iovlen > 1) {
227 return -EINVAL;
228 } else {
229 if (!access_ok(uiov, sizeof(*uiov)))
230 return -EFAULT;
231 if (__get_user(clen, &uiov->iov_len))
232 return -EFAULT;
233 if (clen < 0)
234 return -EINVAL;
235 sr->len = clen;
236 }
237
238 return 0;
239 }
240
241 iomsg->free_iov = iomsg->fast_iov;
242 ret = __import_iovec(ddir, (struct iovec __user *)uiov, msg->msg_iovlen,
243 UIO_FASTIOV, &iomsg->free_iov,
244 &iomsg->msg.msg_iter, true);
245 if (unlikely(ret < 0))
246 return ret;
247
248 return 0;
249 }
250 #endif
251
io_msg_copy_hdr(struct io_kiocb * req,struct io_async_msghdr * iomsg,struct user_msghdr * msg,int ddir)252 static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg,
253 struct user_msghdr *msg, int ddir)
254 {
255 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
256 int ret;
257
258 if (copy_from_user(msg, sr->umsg, sizeof(*sr->umsg)))
259 return -EFAULT;
260
261 if (req->flags & REQ_F_BUFFER_SELECT) {
262 if (msg->msg_iovlen == 0) {
263 sr->len = iomsg->fast_iov[0].iov_len = 0;
264 iomsg->fast_iov[0].iov_base = NULL;
265 iomsg->free_iov = NULL;
266 } else if (msg->msg_iovlen > 1) {
267 return -EINVAL;
268 } else {
269 if (copy_from_user(iomsg->fast_iov, msg->msg_iov,
270 sizeof(*msg->msg_iov)))
271 return -EFAULT;
272 sr->len = iomsg->fast_iov[0].iov_len;
273 iomsg->free_iov = NULL;
274 }
275
276 return 0;
277 }
278
279 iomsg->free_iov = iomsg->fast_iov;
280 ret = __import_iovec(ddir, msg->msg_iov, msg->msg_iovlen, UIO_FASTIOV,
281 &iomsg->free_iov, &iomsg->msg.msg_iter, false);
282 if (unlikely(ret < 0))
283 return ret;
284
285 return 0;
286 }
287
io_sendmsg_copy_hdr(struct io_kiocb * req,struct io_async_msghdr * iomsg)288 static int io_sendmsg_copy_hdr(struct io_kiocb *req,
289 struct io_async_msghdr *iomsg)
290 {
291 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
292 struct user_msghdr msg;
293 int ret;
294
295 iomsg->msg.msg_name = &iomsg->addr;
296 iomsg->msg.msg_iter.nr_segs = 0;
297
298 #ifdef CONFIG_COMPAT
299 if (unlikely(req->ctx->compat)) {
300 struct compat_msghdr cmsg;
301
302 ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_SOURCE);
303 if (unlikely(ret))
304 return ret;
305
306 return __get_compat_msghdr(&iomsg->msg, &cmsg, NULL);
307 }
308 #endif
309
310 ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_SOURCE);
311 if (unlikely(ret))
312 return ret;
313
314 ret = __copy_msghdr(&iomsg->msg, &msg, NULL);
315
316 /* save msg_control as sys_sendmsg() overwrites it */
317 sr->msg_control = iomsg->msg.msg_control_user;
318 return ret;
319 }
320
io_send_prep_async(struct io_kiocb * req)321 int io_send_prep_async(struct io_kiocb *req)
322 {
323 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
324 struct io_async_msghdr *io;
325 int ret;
326
327 if (!zc->addr || req_has_async_data(req))
328 return 0;
329 io = io_msg_alloc_async_prep(req);
330 if (!io)
331 return -ENOMEM;
332 ret = move_addr_to_kernel(zc->addr, zc->addr_len, &io->addr);
333 return ret;
334 }
335
io_setup_async_addr(struct io_kiocb * req,struct sockaddr_storage * addr_storage,unsigned int issue_flags)336 static int io_setup_async_addr(struct io_kiocb *req,
337 struct sockaddr_storage *addr_storage,
338 unsigned int issue_flags)
339 {
340 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
341 struct io_async_msghdr *io;
342
343 if (!sr->addr || req_has_async_data(req))
344 return -EAGAIN;
345 io = io_msg_alloc_async(req, issue_flags);
346 if (!io)
347 return -ENOMEM;
348 memcpy(&io->addr, addr_storage, sizeof(io->addr));
349 return -EAGAIN;
350 }
351
io_sendmsg_prep_async(struct io_kiocb * req)352 int io_sendmsg_prep_async(struct io_kiocb *req)
353 {
354 int ret;
355
356 if (!io_msg_alloc_async_prep(req))
357 return -ENOMEM;
358 ret = io_sendmsg_copy_hdr(req, req->async_data);
359 if (!ret)
360 req->flags |= REQ_F_NEED_CLEANUP;
361 return ret;
362 }
363
io_sendmsg_recvmsg_cleanup(struct io_kiocb * req)364 void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req)
365 {
366 struct io_async_msghdr *io = req->async_data;
367
368 kfree(io->free_iov);
369 }
370
io_sendmsg_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)371 int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
372 {
373 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
374
375 if (req->opcode == IORING_OP_SEND) {
376 if (READ_ONCE(sqe->__pad3[0]))
377 return -EINVAL;
378 sr->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2));
379 sr->addr_len = READ_ONCE(sqe->addr_len);
380 } else if (sqe->addr2 || sqe->file_index) {
381 return -EINVAL;
382 }
383
384 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
385 sr->len = READ_ONCE(sqe->len);
386 sr->flags = READ_ONCE(sqe->ioprio);
387 if (sr->flags & ~IORING_RECVSEND_POLL_FIRST)
388 return -EINVAL;
389 sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
390 if (sr->msg_flags & MSG_DONTWAIT)
391 req->flags |= REQ_F_NOWAIT;
392
393 #ifdef CONFIG_COMPAT
394 if (req->ctx->compat)
395 sr->msg_flags |= MSG_CMSG_COMPAT;
396 #endif
397 sr->done_io = 0;
398 return 0;
399 }
400
io_sendmsg(struct io_kiocb * req,unsigned int issue_flags)401 int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
402 {
403 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
404 struct io_async_msghdr iomsg, *kmsg;
405 struct socket *sock;
406 unsigned flags;
407 int min_ret = 0;
408 int ret;
409
410 sock = sock_from_file(req->file);
411 if (unlikely(!sock))
412 return -ENOTSOCK;
413
414 if (req_has_async_data(req)) {
415 kmsg = req->async_data;
416 kmsg->msg.msg_control_user = sr->msg_control;
417 } else {
418 ret = io_sendmsg_copy_hdr(req, &iomsg);
419 if (ret)
420 return ret;
421 kmsg = &iomsg;
422 }
423
424 if (!(req->flags & REQ_F_POLLED) &&
425 (sr->flags & IORING_RECVSEND_POLL_FIRST))
426 return io_setup_async_msg(req, kmsg, issue_flags);
427
428 flags = sr->msg_flags;
429 if (issue_flags & IO_URING_F_NONBLOCK)
430 flags |= MSG_DONTWAIT;
431 if (flags & MSG_WAITALL)
432 min_ret = iov_iter_count(&kmsg->msg.msg_iter);
433
434 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
435
436 if (ret < min_ret) {
437 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
438 return io_setup_async_msg(req, kmsg, issue_flags);
439 if (ret > 0 && io_net_retry(sock, flags)) {
440 kmsg->msg.msg_controllen = 0;
441 kmsg->msg.msg_control = NULL;
442 sr->done_io += ret;
443 req->flags |= REQ_F_PARTIAL_IO;
444 return io_setup_async_msg(req, kmsg, issue_flags);
445 }
446 if (ret == -ERESTARTSYS)
447 ret = -EINTR;
448 req_set_fail(req);
449 }
450 /* fast path, check for non-NULL to avoid function call */
451 if (kmsg->free_iov)
452 kfree(kmsg->free_iov);
453 req->flags &= ~REQ_F_NEED_CLEANUP;
454 io_netmsg_recycle(req, issue_flags);
455 if (ret >= 0)
456 ret += sr->done_io;
457 else if (sr->done_io)
458 ret = sr->done_io;
459 io_req_set_res(req, ret, 0);
460 return IOU_OK;
461 }
462
io_send(struct io_kiocb * req,unsigned int issue_flags)463 int io_send(struct io_kiocb *req, unsigned int issue_flags)
464 {
465 struct sockaddr_storage __address;
466 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
467 struct msghdr msg;
468 struct socket *sock;
469 unsigned flags;
470 int min_ret = 0;
471 int ret;
472
473 msg.msg_name = NULL;
474 msg.msg_control = NULL;
475 msg.msg_controllen = 0;
476 msg.msg_namelen = 0;
477 msg.msg_ubuf = NULL;
478
479 if (sr->addr) {
480 if (req_has_async_data(req)) {
481 struct io_async_msghdr *io = req->async_data;
482
483 msg.msg_name = &io->addr;
484 } else {
485 ret = move_addr_to_kernel(sr->addr, sr->addr_len, &__address);
486 if (unlikely(ret < 0))
487 return ret;
488 msg.msg_name = (struct sockaddr *)&__address;
489 }
490 msg.msg_namelen = sr->addr_len;
491 }
492
493 if (!(req->flags & REQ_F_POLLED) &&
494 (sr->flags & IORING_RECVSEND_POLL_FIRST))
495 return io_setup_async_addr(req, &__address, issue_flags);
496
497 sock = sock_from_file(req->file);
498 if (unlikely(!sock))
499 return -ENOTSOCK;
500
501 ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, &msg.msg_iter);
502 if (unlikely(ret))
503 return ret;
504
505 flags = sr->msg_flags;
506 if (issue_flags & IO_URING_F_NONBLOCK)
507 flags |= MSG_DONTWAIT;
508 if (flags & MSG_WAITALL)
509 min_ret = iov_iter_count(&msg.msg_iter);
510
511 flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
512 msg.msg_flags = flags;
513 ret = sock_sendmsg(sock, &msg);
514 if (ret < min_ret) {
515 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
516 return io_setup_async_addr(req, &__address, issue_flags);
517
518 if (ret > 0 && io_net_retry(sock, flags)) {
519 sr->len -= ret;
520 sr->buf += ret;
521 sr->done_io += ret;
522 req->flags |= REQ_F_PARTIAL_IO;
523 return io_setup_async_addr(req, &__address, issue_flags);
524 }
525 if (ret == -ERESTARTSYS)
526 ret = -EINTR;
527 req_set_fail(req);
528 }
529 if (ret >= 0)
530 ret += sr->done_io;
531 else if (sr->done_io)
532 ret = sr->done_io;
533 io_req_set_res(req, ret, 0);
534 return IOU_OK;
535 }
536
io_recvmsg_mshot_prep(struct io_kiocb * req,struct io_async_msghdr * iomsg,int namelen,size_t controllen)537 static int io_recvmsg_mshot_prep(struct io_kiocb *req,
538 struct io_async_msghdr *iomsg,
539 int namelen, size_t controllen)
540 {
541 if ((req->flags & (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) ==
542 (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) {
543 int hdr;
544
545 if (unlikely(namelen < 0))
546 return -EOVERFLOW;
547 if (check_add_overflow(sizeof(struct io_uring_recvmsg_out),
548 namelen, &hdr))
549 return -EOVERFLOW;
550 if (check_add_overflow(hdr, controllen, &hdr))
551 return -EOVERFLOW;
552
553 iomsg->namelen = namelen;
554 iomsg->controllen = controllen;
555 return 0;
556 }
557
558 return 0;
559 }
560
io_recvmsg_copy_hdr(struct io_kiocb * req,struct io_async_msghdr * iomsg)561 static int io_recvmsg_copy_hdr(struct io_kiocb *req,
562 struct io_async_msghdr *iomsg)
563 {
564 struct user_msghdr msg;
565 int ret;
566
567 iomsg->msg.msg_name = &iomsg->addr;
568 iomsg->msg.msg_iter.nr_segs = 0;
569
570 #ifdef CONFIG_COMPAT
571 if (unlikely(req->ctx->compat)) {
572 struct compat_msghdr cmsg;
573
574 ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_DEST);
575 if (unlikely(ret))
576 return ret;
577
578 ret = __get_compat_msghdr(&iomsg->msg, &cmsg, &iomsg->uaddr);
579 if (unlikely(ret))
580 return ret;
581
582 return io_recvmsg_mshot_prep(req, iomsg, cmsg.msg_namelen,
583 cmsg.msg_controllen);
584 }
585 #endif
586
587 ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_DEST);
588 if (unlikely(ret))
589 return ret;
590
591 ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr);
592 if (unlikely(ret))
593 return ret;
594
595 return io_recvmsg_mshot_prep(req, iomsg, msg.msg_namelen,
596 msg.msg_controllen);
597 }
598
io_recvmsg_prep_async(struct io_kiocb * req)599 int io_recvmsg_prep_async(struct io_kiocb *req)
600 {
601 struct io_async_msghdr *iomsg;
602 int ret;
603
604 if (!io_msg_alloc_async_prep(req))
605 return -ENOMEM;
606 iomsg = req->async_data;
607 ret = io_recvmsg_copy_hdr(req, iomsg);
608 if (!ret)
609 req->flags |= REQ_F_NEED_CLEANUP;
610 return ret;
611 }
612
613 #define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT)
614
io_recvmsg_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)615 int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
616 {
617 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
618
619 if (unlikely(sqe->file_index || sqe->addr2))
620 return -EINVAL;
621
622 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
623 sr->len = READ_ONCE(sqe->len);
624 sr->flags = READ_ONCE(sqe->ioprio);
625 if (sr->flags & ~(RECVMSG_FLAGS))
626 return -EINVAL;
627 sr->msg_flags = READ_ONCE(sqe->msg_flags);
628 if (sr->msg_flags & MSG_DONTWAIT)
629 req->flags |= REQ_F_NOWAIT;
630 if (sr->msg_flags & MSG_ERRQUEUE)
631 req->flags |= REQ_F_CLEAR_POLLIN;
632 if (sr->flags & IORING_RECV_MULTISHOT) {
633 if (!(req->flags & REQ_F_BUFFER_SELECT))
634 return -EINVAL;
635 if (sr->msg_flags & MSG_WAITALL)
636 return -EINVAL;
637 if (req->opcode == IORING_OP_RECV && sr->len)
638 return -EINVAL;
639 req->flags |= REQ_F_APOLL_MULTISHOT;
640 /*
641 * Store the buffer group for this multishot receive separately,
642 * as if we end up doing an io-wq based issue that selects a
643 * buffer, it has to be committed immediately and that will
644 * clear ->buf_list. This means we lose the link to the buffer
645 * list, and the eventual buffer put on completion then cannot
646 * restore it.
647 */
648 sr->buf_group = req->buf_index;
649 }
650
651 #ifdef CONFIG_COMPAT
652 if (req->ctx->compat)
653 sr->msg_flags |= MSG_CMSG_COMPAT;
654 #endif
655 sr->done_io = 0;
656 sr->nr_multishot_loops = 0;
657 return 0;
658 }
659
io_recv_prep_retry(struct io_kiocb * req)660 static inline void io_recv_prep_retry(struct io_kiocb *req)
661 {
662 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
663
664 sr->done_io = 0;
665 sr->len = 0; /* get from the provided buffer */
666 req->buf_index = sr->buf_group;
667 }
668
669 /*
670 * Finishes io_recv and io_recvmsg.
671 *
672 * Returns true if it is actually finished, or false if it should run
673 * again (for multishot).
674 */
io_recv_finish(struct io_kiocb * req,int * ret,struct msghdr * msg,bool mshot_finished,unsigned issue_flags)675 static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
676 struct msghdr *msg, bool mshot_finished,
677 unsigned issue_flags)
678 {
679 unsigned int cflags;
680
681 cflags = io_put_kbuf(req, issue_flags);
682 if (msg->msg_inq && msg->msg_inq != -1)
683 cflags |= IORING_CQE_F_SOCK_NONEMPTY;
684
685 if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
686 io_req_set_res(req, *ret, cflags);
687 *ret = IOU_OK;
688 return true;
689 }
690
691 if (mshot_finished)
692 goto finish;
693
694 /*
695 * Fill CQE for this receive and see if we should keep trying to
696 * receive from this socket.
697 */
698 if (io_fill_cqe_req_aux(req, issue_flags & IO_URING_F_COMPLETE_DEFER,
699 *ret, cflags | IORING_CQE_F_MORE)) {
700 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
701 int mshot_retry_ret = IOU_ISSUE_SKIP_COMPLETE;
702
703 io_recv_prep_retry(req);
704 /* Known not-empty or unknown state, retry */
705 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || msg->msg_inq == -1) {
706 if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY)
707 return false;
708 /* mshot retries exceeded, force a requeue */
709 sr->nr_multishot_loops = 0;
710 mshot_retry_ret = IOU_REQUEUE;
711 }
712 if (issue_flags & IO_URING_F_MULTISHOT)
713 *ret = mshot_retry_ret;
714 else
715 *ret = -EAGAIN;
716 return true;
717 }
718 /* Otherwise stop multishot but use the current result. */
719 finish:
720 io_req_set_res(req, *ret, cflags);
721
722 if (issue_flags & IO_URING_F_MULTISHOT)
723 *ret = IOU_STOP_MULTISHOT;
724 else
725 *ret = IOU_OK;
726 return true;
727 }
728
io_recvmsg_prep_multishot(struct io_async_msghdr * kmsg,struct io_sr_msg * sr,void __user ** buf,size_t * len)729 static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg,
730 struct io_sr_msg *sr, void __user **buf,
731 size_t *len)
732 {
733 unsigned long ubuf = (unsigned long) *buf;
734 unsigned long hdr;
735
736 hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen +
737 kmsg->controllen;
738 if (*len < hdr)
739 return -EFAULT;
740
741 if (kmsg->controllen) {
742 unsigned long control = ubuf + hdr - kmsg->controllen;
743
744 kmsg->msg.msg_control_user = (void __user *) control;
745 kmsg->msg.msg_controllen = kmsg->controllen;
746 }
747
748 sr->buf = *buf; /* stash for later copy */
749 *buf = (void __user *) (ubuf + hdr);
750 kmsg->payloadlen = *len = *len - hdr;
751 return 0;
752 }
753
754 struct io_recvmsg_multishot_hdr {
755 struct io_uring_recvmsg_out msg;
756 struct sockaddr_storage addr;
757 };
758
io_recvmsg_multishot(struct socket * sock,struct io_sr_msg * io,struct io_async_msghdr * kmsg,unsigned int flags,bool * finished)759 static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io,
760 struct io_async_msghdr *kmsg,
761 unsigned int flags, bool *finished)
762 {
763 int err;
764 int copy_len;
765 struct io_recvmsg_multishot_hdr hdr;
766
767 if (kmsg->namelen)
768 kmsg->msg.msg_name = &hdr.addr;
769 kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
770 kmsg->msg.msg_namelen = 0;
771
772 if (sock->file->f_flags & O_NONBLOCK)
773 flags |= MSG_DONTWAIT;
774
775 err = sock_recvmsg(sock, &kmsg->msg, flags);
776 *finished = err <= 0;
777 if (err < 0)
778 return err;
779
780 hdr.msg = (struct io_uring_recvmsg_out) {
781 .controllen = kmsg->controllen - kmsg->msg.msg_controllen,
782 .flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT
783 };
784
785 hdr.msg.payloadlen = err;
786 if (err > kmsg->payloadlen)
787 err = kmsg->payloadlen;
788
789 copy_len = sizeof(struct io_uring_recvmsg_out);
790 if (kmsg->msg.msg_namelen > kmsg->namelen)
791 copy_len += kmsg->namelen;
792 else
793 copy_len += kmsg->msg.msg_namelen;
794
795 /*
796 * "fromlen shall refer to the value before truncation.."
797 * 1003.1g
798 */
799 hdr.msg.namelen = kmsg->msg.msg_namelen;
800
801 /* ensure that there is no gap between hdr and sockaddr_storage */
802 BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) !=
803 sizeof(struct io_uring_recvmsg_out));
804 if (copy_to_user(io->buf, &hdr, copy_len)) {
805 *finished = true;
806 return -EFAULT;
807 }
808
809 return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen +
810 kmsg->controllen + err;
811 }
812
io_recvmsg(struct io_kiocb * req,unsigned int issue_flags)813 int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
814 {
815 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
816 struct io_async_msghdr iomsg, *kmsg;
817 struct socket *sock;
818 unsigned flags;
819 int ret, min_ret = 0;
820 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
821 bool mshot_finished = true;
822
823 sock = sock_from_file(req->file);
824 if (unlikely(!sock))
825 return -ENOTSOCK;
826
827 if (req_has_async_data(req)) {
828 kmsg = req->async_data;
829 } else {
830 ret = io_recvmsg_copy_hdr(req, &iomsg);
831 if (ret)
832 return ret;
833 kmsg = &iomsg;
834 }
835
836 if (!(req->flags & REQ_F_POLLED) &&
837 (sr->flags & IORING_RECVSEND_POLL_FIRST))
838 return io_setup_async_msg(req, kmsg, issue_flags);
839
840 if (!io_check_multishot(req, issue_flags))
841 return io_setup_async_msg(req, kmsg, issue_flags);
842
843 retry_multishot:
844 if (io_do_buffer_select(req)) {
845 void __user *buf;
846 size_t len = sr->len;
847
848 buf = io_buffer_select(req, &len, issue_flags);
849 if (!buf)
850 return -ENOBUFS;
851
852 if (req->flags & REQ_F_APOLL_MULTISHOT) {
853 ret = io_recvmsg_prep_multishot(kmsg, sr, &buf, &len);
854 if (ret) {
855 io_kbuf_recycle(req, issue_flags);
856 return ret;
857 }
858 }
859
860 iov_iter_ubuf(&kmsg->msg.msg_iter, ITER_DEST, buf, len);
861 }
862
863 flags = sr->msg_flags;
864 if (force_nonblock)
865 flags |= MSG_DONTWAIT;
866
867 kmsg->msg.msg_get_inq = 1;
868 kmsg->msg.msg_inq = -1;
869 if (req->flags & REQ_F_APOLL_MULTISHOT) {
870 ret = io_recvmsg_multishot(sock, sr, kmsg, flags,
871 &mshot_finished);
872 } else {
873 /* disable partial retry for recvmsg with cmsg attached */
874 if (flags & MSG_WAITALL && !kmsg->msg.msg_controllen)
875 min_ret = iov_iter_count(&kmsg->msg.msg_iter);
876
877 ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg,
878 kmsg->uaddr, flags);
879 }
880
881 if (ret < min_ret) {
882 if (ret == -EAGAIN && force_nonblock) {
883 ret = io_setup_async_msg(req, kmsg, issue_flags);
884 if (ret == -EAGAIN && (issue_flags & IO_URING_F_MULTISHOT)) {
885 io_kbuf_recycle(req, issue_flags);
886 return IOU_ISSUE_SKIP_COMPLETE;
887 }
888 return ret;
889 }
890 if (ret > 0 && io_net_retry(sock, flags)) {
891 sr->done_io += ret;
892 req->flags |= REQ_F_PARTIAL_IO;
893 return io_setup_async_msg(req, kmsg, issue_flags);
894 }
895 if (ret == -ERESTARTSYS)
896 ret = -EINTR;
897 req_set_fail(req);
898 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
899 req_set_fail(req);
900 }
901
902 if (ret > 0)
903 ret += sr->done_io;
904 else if (sr->done_io)
905 ret = sr->done_io;
906 else
907 io_kbuf_recycle(req, issue_flags);
908
909 if (!io_recv_finish(req, &ret, &kmsg->msg, mshot_finished, issue_flags))
910 goto retry_multishot;
911
912 if (mshot_finished) {
913 /* fast path, check for non-NULL to avoid function call */
914 if (kmsg->free_iov)
915 kfree(kmsg->free_iov);
916 io_netmsg_recycle(req, issue_flags);
917 req->flags &= ~REQ_F_NEED_CLEANUP;
918 } else if (ret == -EAGAIN)
919 return io_setup_async_msg(req, kmsg, issue_flags);
920
921 return ret;
922 }
923
io_recv(struct io_kiocb * req,unsigned int issue_flags)924 int io_recv(struct io_kiocb *req, unsigned int issue_flags)
925 {
926 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
927 struct msghdr msg;
928 struct socket *sock;
929 unsigned flags;
930 int ret, min_ret = 0;
931 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
932 size_t len = sr->len;
933 bool mshot_finished;
934
935 if (!(req->flags & REQ_F_POLLED) &&
936 (sr->flags & IORING_RECVSEND_POLL_FIRST))
937 return -EAGAIN;
938
939 if (!io_check_multishot(req, issue_flags))
940 return -EAGAIN;
941
942 sock = sock_from_file(req->file);
943 if (unlikely(!sock))
944 return -ENOTSOCK;
945
946 msg.msg_name = NULL;
947 msg.msg_namelen = 0;
948 msg.msg_control = NULL;
949 msg.msg_get_inq = 1;
950 msg.msg_controllen = 0;
951 msg.msg_iocb = NULL;
952 msg.msg_ubuf = NULL;
953
954 retry_multishot:
955 if (io_do_buffer_select(req)) {
956 void __user *buf;
957
958 buf = io_buffer_select(req, &len, issue_flags);
959 if (!buf)
960 return -ENOBUFS;
961 sr->buf = buf;
962 sr->len = len;
963 }
964
965 ret = import_ubuf(ITER_DEST, sr->buf, len, &msg.msg_iter);
966 if (unlikely(ret))
967 goto out_free;
968
969 msg.msg_inq = -1;
970 msg.msg_flags = 0;
971
972 flags = sr->msg_flags;
973 if (force_nonblock)
974 flags |= MSG_DONTWAIT;
975 if (flags & MSG_WAITALL)
976 min_ret = iov_iter_count(&msg.msg_iter);
977
978 ret = sock_recvmsg(sock, &msg, flags);
979 if (ret < min_ret) {
980 if (ret == -EAGAIN && force_nonblock) {
981 if (issue_flags & IO_URING_F_MULTISHOT) {
982 io_kbuf_recycle(req, issue_flags);
983 return IOU_ISSUE_SKIP_COMPLETE;
984 }
985
986 return -EAGAIN;
987 }
988 if (ret > 0 && io_net_retry(sock, flags)) {
989 sr->len -= ret;
990 sr->buf += ret;
991 sr->done_io += ret;
992 req->flags |= REQ_F_PARTIAL_IO;
993 return -EAGAIN;
994 }
995 if (ret == -ERESTARTSYS)
996 ret = -EINTR;
997 req_set_fail(req);
998 } else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
999 out_free:
1000 req_set_fail(req);
1001 }
1002
1003 mshot_finished = ret <= 0;
1004 if (ret > 0)
1005 ret += sr->done_io;
1006 else if (sr->done_io)
1007 ret = sr->done_io;
1008 else
1009 io_kbuf_recycle(req, issue_flags);
1010
1011 if (!io_recv_finish(req, &ret, &msg, mshot_finished, issue_flags))
1012 goto retry_multishot;
1013
1014 return ret;
1015 }
1016
io_send_zc_cleanup(struct io_kiocb * req)1017 void io_send_zc_cleanup(struct io_kiocb *req)
1018 {
1019 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
1020 struct io_async_msghdr *io;
1021
1022 if (req_has_async_data(req)) {
1023 io = req->async_data;
1024 /* might be ->fast_iov if *msg_copy_hdr failed */
1025 if (io->free_iov != io->fast_iov)
1026 kfree(io->free_iov);
1027 }
1028 if (zc->notif) {
1029 io_notif_flush(zc->notif);
1030 zc->notif = NULL;
1031 }
1032 }
1033
1034 #define IO_ZC_FLAGS_COMMON (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_FIXED_BUF)
1035 #define IO_ZC_FLAGS_VALID (IO_ZC_FLAGS_COMMON | IORING_SEND_ZC_REPORT_USAGE)
1036
io_send_zc_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)1037 int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1038 {
1039 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
1040 struct io_ring_ctx *ctx = req->ctx;
1041 struct io_kiocb *notif;
1042
1043 if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)))
1044 return -EINVAL;
1045 /* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */
1046 if (req->flags & REQ_F_CQE_SKIP)
1047 return -EINVAL;
1048
1049 notif = zc->notif = io_alloc_notif(ctx);
1050 if (!notif)
1051 return -ENOMEM;
1052 notif->cqe.user_data = req->cqe.user_data;
1053 notif->cqe.res = 0;
1054 notif->cqe.flags = IORING_CQE_F_NOTIF;
1055 req->flags |= REQ_F_NEED_CLEANUP;
1056
1057 zc->flags = READ_ONCE(sqe->ioprio);
1058 if (unlikely(zc->flags & ~IO_ZC_FLAGS_COMMON)) {
1059 if (zc->flags & ~IO_ZC_FLAGS_VALID)
1060 return -EINVAL;
1061 if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) {
1062 io_notif_set_extended(notif);
1063 io_notif_to_data(notif)->zc_report = true;
1064 }
1065 }
1066
1067 if (zc->flags & IORING_RECVSEND_FIXED_BUF) {
1068 unsigned idx = READ_ONCE(sqe->buf_index);
1069
1070 if (unlikely(idx >= ctx->nr_user_bufs))
1071 return -EFAULT;
1072 idx = array_index_nospec(idx, ctx->nr_user_bufs);
1073 req->imu = READ_ONCE(ctx->user_bufs[idx]);
1074 io_req_set_rsrc_node(notif, ctx, 0);
1075 }
1076
1077 if (req->opcode == IORING_OP_SEND_ZC) {
1078 if (READ_ONCE(sqe->__pad3[0]))
1079 return -EINVAL;
1080 zc->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2));
1081 zc->addr_len = READ_ONCE(sqe->addr_len);
1082 } else {
1083 if (unlikely(sqe->addr2 || sqe->file_index))
1084 return -EINVAL;
1085 if (unlikely(zc->flags & IORING_RECVSEND_FIXED_BUF))
1086 return -EINVAL;
1087 }
1088
1089 zc->buf = u64_to_user_ptr(READ_ONCE(sqe->addr));
1090 zc->len = READ_ONCE(sqe->len);
1091 zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
1092 if (zc->msg_flags & MSG_DONTWAIT)
1093 req->flags |= REQ_F_NOWAIT;
1094
1095 zc->done_io = 0;
1096
1097 #ifdef CONFIG_COMPAT
1098 if (req->ctx->compat)
1099 zc->msg_flags |= MSG_CMSG_COMPAT;
1100 #endif
1101 return 0;
1102 }
1103
io_sg_from_iter_iovec(struct sock * sk,struct sk_buff * skb,struct iov_iter * from,size_t length)1104 static int io_sg_from_iter_iovec(struct sock *sk, struct sk_buff *skb,
1105 struct iov_iter *from, size_t length)
1106 {
1107 skb_zcopy_downgrade_managed(skb);
1108 return __zerocopy_sg_from_iter(NULL, sk, skb, from, length);
1109 }
1110
io_sg_from_iter(struct sock * sk,struct sk_buff * skb,struct iov_iter * from,size_t length)1111 static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb,
1112 struct iov_iter *from, size_t length)
1113 {
1114 struct skb_shared_info *shinfo = skb_shinfo(skb);
1115 int frag = shinfo->nr_frags;
1116 int ret = 0;
1117 struct bvec_iter bi;
1118 ssize_t copied = 0;
1119 unsigned long truesize = 0;
1120
1121 if (!frag)
1122 shinfo->flags |= SKBFL_MANAGED_FRAG_REFS;
1123 else if (unlikely(!skb_zcopy_managed(skb)))
1124 return __zerocopy_sg_from_iter(NULL, sk, skb, from, length);
1125
1126 bi.bi_size = min(from->count, length);
1127 bi.bi_bvec_done = from->iov_offset;
1128 bi.bi_idx = 0;
1129
1130 while (bi.bi_size && frag < MAX_SKB_FRAGS) {
1131 struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi);
1132
1133 copied += v.bv_len;
1134 truesize += PAGE_ALIGN(v.bv_len + v.bv_offset);
1135 __skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page,
1136 v.bv_offset, v.bv_len);
1137 bvec_iter_advance_single(from->bvec, &bi, v.bv_len);
1138 }
1139 if (bi.bi_size)
1140 ret = -EMSGSIZE;
1141
1142 shinfo->nr_frags = frag;
1143 from->bvec += bi.bi_idx;
1144 from->nr_segs -= bi.bi_idx;
1145 from->count -= copied;
1146 from->iov_offset = bi.bi_bvec_done;
1147
1148 skb->data_len += copied;
1149 skb->len += copied;
1150 skb->truesize += truesize;
1151
1152 if (sk && sk->sk_type == SOCK_STREAM) {
1153 sk_wmem_queued_add(sk, truesize);
1154 if (!skb_zcopy_pure(skb))
1155 sk_mem_charge(sk, truesize);
1156 } else {
1157 refcount_add(truesize, &skb->sk->sk_wmem_alloc);
1158 }
1159 return ret;
1160 }
1161
io_send_zc(struct io_kiocb * req,unsigned int issue_flags)1162 int io_send_zc(struct io_kiocb *req, unsigned int issue_flags)
1163 {
1164 struct sockaddr_storage __address;
1165 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
1166 struct msghdr msg;
1167 struct socket *sock;
1168 unsigned msg_flags;
1169 int ret, min_ret = 0;
1170
1171 sock = sock_from_file(req->file);
1172 if (unlikely(!sock))
1173 return -ENOTSOCK;
1174 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags))
1175 return -EOPNOTSUPP;
1176
1177 msg.msg_name = NULL;
1178 msg.msg_control = NULL;
1179 msg.msg_controllen = 0;
1180 msg.msg_namelen = 0;
1181
1182 if (zc->addr) {
1183 if (req_has_async_data(req)) {
1184 struct io_async_msghdr *io = req->async_data;
1185
1186 msg.msg_name = &io->addr;
1187 } else {
1188 ret = move_addr_to_kernel(zc->addr, zc->addr_len, &__address);
1189 if (unlikely(ret < 0))
1190 return ret;
1191 msg.msg_name = (struct sockaddr *)&__address;
1192 }
1193 msg.msg_namelen = zc->addr_len;
1194 }
1195
1196 if (!(req->flags & REQ_F_POLLED) &&
1197 (zc->flags & IORING_RECVSEND_POLL_FIRST))
1198 return io_setup_async_addr(req, &__address, issue_flags);
1199
1200 if (zc->flags & IORING_RECVSEND_FIXED_BUF) {
1201 ret = io_import_fixed(ITER_SOURCE, &msg.msg_iter, req->imu,
1202 (u64)(uintptr_t)zc->buf, zc->len);
1203 if (unlikely(ret))
1204 return ret;
1205 msg.sg_from_iter = io_sg_from_iter;
1206 } else {
1207 io_notif_set_extended(zc->notif);
1208 ret = import_ubuf(ITER_SOURCE, zc->buf, zc->len, &msg.msg_iter);
1209 if (unlikely(ret))
1210 return ret;
1211 ret = io_notif_account_mem(zc->notif, zc->len);
1212 if (unlikely(ret))
1213 return ret;
1214 msg.sg_from_iter = io_sg_from_iter_iovec;
1215 }
1216
1217 msg_flags = zc->msg_flags | MSG_ZEROCOPY;
1218 if (issue_flags & IO_URING_F_NONBLOCK)
1219 msg_flags |= MSG_DONTWAIT;
1220 if (msg_flags & MSG_WAITALL)
1221 min_ret = iov_iter_count(&msg.msg_iter);
1222 msg_flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
1223
1224 msg.msg_flags = msg_flags;
1225 msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg;
1226 ret = sock_sendmsg(sock, &msg);
1227
1228 if (unlikely(ret < min_ret)) {
1229 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1230 return io_setup_async_addr(req, &__address, issue_flags);
1231
1232 if (ret > 0 && io_net_retry(sock, msg.msg_flags)) {
1233 zc->len -= ret;
1234 zc->buf += ret;
1235 zc->done_io += ret;
1236 req->flags |= REQ_F_PARTIAL_IO;
1237 return io_setup_async_addr(req, &__address, issue_flags);
1238 }
1239 if (ret == -ERESTARTSYS)
1240 ret = -EINTR;
1241 req_set_fail(req);
1242 }
1243
1244 if (ret >= 0)
1245 ret += zc->done_io;
1246 else if (zc->done_io)
1247 ret = zc->done_io;
1248
1249 /*
1250 * If we're in io-wq we can't rely on tw ordering guarantees, defer
1251 * flushing notif to io_send_zc_cleanup()
1252 */
1253 if (!(issue_flags & IO_URING_F_UNLOCKED)) {
1254 io_notif_flush(zc->notif);
1255 req->flags &= ~REQ_F_NEED_CLEANUP;
1256 }
1257 io_req_set_res(req, ret, IORING_CQE_F_MORE);
1258 return IOU_OK;
1259 }
1260
io_sendmsg_zc(struct io_kiocb * req,unsigned int issue_flags)1261 int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)
1262 {
1263 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1264 struct io_async_msghdr iomsg, *kmsg;
1265 struct socket *sock;
1266 unsigned flags;
1267 int ret, min_ret = 0;
1268
1269 io_notif_set_extended(sr->notif);
1270
1271 sock = sock_from_file(req->file);
1272 if (unlikely(!sock))
1273 return -ENOTSOCK;
1274 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags))
1275 return -EOPNOTSUPP;
1276
1277 if (req_has_async_data(req)) {
1278 kmsg = req->async_data;
1279 kmsg->msg.msg_control_user = sr->msg_control;
1280 } else {
1281 ret = io_sendmsg_copy_hdr(req, &iomsg);
1282 if (ret)
1283 return ret;
1284 kmsg = &iomsg;
1285 }
1286
1287 if (!(req->flags & REQ_F_POLLED) &&
1288 (sr->flags & IORING_RECVSEND_POLL_FIRST))
1289 return io_setup_async_msg(req, kmsg, issue_flags);
1290
1291 flags = sr->msg_flags | MSG_ZEROCOPY;
1292 if (issue_flags & IO_URING_F_NONBLOCK)
1293 flags |= MSG_DONTWAIT;
1294 if (flags & MSG_WAITALL)
1295 min_ret = iov_iter_count(&kmsg->msg.msg_iter);
1296
1297 kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg;
1298 kmsg->msg.sg_from_iter = io_sg_from_iter_iovec;
1299 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
1300
1301 if (unlikely(ret < min_ret)) {
1302 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1303 return io_setup_async_msg(req, kmsg, issue_flags);
1304
1305 if (ret > 0 && io_net_retry(sock, flags)) {
1306 sr->done_io += ret;
1307 req->flags |= REQ_F_PARTIAL_IO;
1308 return io_setup_async_msg(req, kmsg, issue_flags);
1309 }
1310 if (ret == -ERESTARTSYS)
1311 ret = -EINTR;
1312 req_set_fail(req);
1313 }
1314 /* fast path, check for non-NULL to avoid function call */
1315 if (kmsg->free_iov) {
1316 kfree(kmsg->free_iov);
1317 kmsg->free_iov = NULL;
1318 }
1319
1320 io_netmsg_recycle(req, issue_flags);
1321 if (ret >= 0)
1322 ret += sr->done_io;
1323 else if (sr->done_io)
1324 ret = sr->done_io;
1325
1326 /*
1327 * If we're in io-wq we can't rely on tw ordering guarantees, defer
1328 * flushing notif to io_send_zc_cleanup()
1329 */
1330 if (!(issue_flags & IO_URING_F_UNLOCKED)) {
1331 io_notif_flush(sr->notif);
1332 req->flags &= ~REQ_F_NEED_CLEANUP;
1333 }
1334 io_req_set_res(req, ret, IORING_CQE_F_MORE);
1335 return IOU_OK;
1336 }
1337
io_sendrecv_fail(struct io_kiocb * req)1338 void io_sendrecv_fail(struct io_kiocb *req)
1339 {
1340 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1341
1342 if (req->flags & REQ_F_PARTIAL_IO)
1343 req->cqe.res = sr->done_io;
1344
1345 if ((req->flags & REQ_F_NEED_CLEANUP) &&
1346 (req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC))
1347 req->cqe.flags |= IORING_CQE_F_MORE;
1348 }
1349
io_accept_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)1350 int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1351 {
1352 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
1353 unsigned flags;
1354
1355 if (sqe->len || sqe->buf_index)
1356 return -EINVAL;
1357
1358 accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1359 accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
1360 accept->flags = READ_ONCE(sqe->accept_flags);
1361 accept->nofile = rlimit(RLIMIT_NOFILE);
1362 flags = READ_ONCE(sqe->ioprio);
1363 if (flags & ~IORING_ACCEPT_MULTISHOT)
1364 return -EINVAL;
1365
1366 accept->file_slot = READ_ONCE(sqe->file_index);
1367 if (accept->file_slot) {
1368 if (accept->flags & SOCK_CLOEXEC)
1369 return -EINVAL;
1370 if (flags & IORING_ACCEPT_MULTISHOT &&
1371 accept->file_slot != IORING_FILE_INDEX_ALLOC)
1372 return -EINVAL;
1373 }
1374 if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1375 return -EINVAL;
1376 if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK))
1377 accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1378 if (flags & IORING_ACCEPT_MULTISHOT)
1379 req->flags |= REQ_F_APOLL_MULTISHOT;
1380 return 0;
1381 }
1382
io_accept(struct io_kiocb * req,unsigned int issue_flags)1383 int io_accept(struct io_kiocb *req, unsigned int issue_flags)
1384 {
1385 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
1386 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1387 unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0;
1388 bool fixed = !!accept->file_slot;
1389 struct file *file;
1390 int ret, fd;
1391
1392 if (!io_check_multishot(req, issue_flags))
1393 return -EAGAIN;
1394 retry:
1395 if (!fixed) {
1396 fd = __get_unused_fd_flags(accept->flags, accept->nofile);
1397 if (unlikely(fd < 0))
1398 return fd;
1399 }
1400 file = do_accept(req->file, file_flags, accept->addr, accept->addr_len,
1401 accept->flags);
1402 if (IS_ERR(file)) {
1403 if (!fixed)
1404 put_unused_fd(fd);
1405 ret = PTR_ERR(file);
1406 if (ret == -EAGAIN && force_nonblock) {
1407 /*
1408 * if it's multishot and polled, we don't need to
1409 * return EAGAIN to arm the poll infra since it
1410 * has already been done
1411 */
1412 if (issue_flags & IO_URING_F_MULTISHOT)
1413 return IOU_ISSUE_SKIP_COMPLETE;
1414 return ret;
1415 }
1416 if (ret == -ERESTARTSYS)
1417 ret = -EINTR;
1418 req_set_fail(req);
1419 } else if (!fixed) {
1420 fd_install(fd, file);
1421 ret = fd;
1422 } else {
1423 ret = io_fixed_fd_install(req, issue_flags, file,
1424 accept->file_slot);
1425 }
1426
1427 if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
1428 io_req_set_res(req, ret, 0);
1429 return IOU_OK;
1430 }
1431
1432 if (ret < 0)
1433 return ret;
1434 if (io_fill_cqe_req_aux(req, issue_flags & IO_URING_F_COMPLETE_DEFER,
1435 ret, IORING_CQE_F_MORE))
1436 goto retry;
1437
1438 io_req_set_res(req, ret, 0);
1439 return IOU_STOP_MULTISHOT;
1440 }
1441
io_socket_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)1442 int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1443 {
1444 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket);
1445
1446 if (sqe->addr || sqe->rw_flags || sqe->buf_index)
1447 return -EINVAL;
1448
1449 sock->domain = READ_ONCE(sqe->fd);
1450 sock->type = READ_ONCE(sqe->off);
1451 sock->protocol = READ_ONCE(sqe->len);
1452 sock->file_slot = READ_ONCE(sqe->file_index);
1453 sock->nofile = rlimit(RLIMIT_NOFILE);
1454
1455 sock->flags = sock->type & ~SOCK_TYPE_MASK;
1456 if (sock->file_slot && (sock->flags & SOCK_CLOEXEC))
1457 return -EINVAL;
1458 if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1459 return -EINVAL;
1460 return 0;
1461 }
1462
io_socket(struct io_kiocb * req,unsigned int issue_flags)1463 int io_socket(struct io_kiocb *req, unsigned int issue_flags)
1464 {
1465 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket);
1466 bool fixed = !!sock->file_slot;
1467 struct file *file;
1468 int ret, fd;
1469
1470 if (!fixed) {
1471 fd = __get_unused_fd_flags(sock->flags, sock->nofile);
1472 if (unlikely(fd < 0))
1473 return fd;
1474 }
1475 file = __sys_socket_file(sock->domain, sock->type, sock->protocol);
1476 if (IS_ERR(file)) {
1477 if (!fixed)
1478 put_unused_fd(fd);
1479 ret = PTR_ERR(file);
1480 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1481 return -EAGAIN;
1482 if (ret == -ERESTARTSYS)
1483 ret = -EINTR;
1484 req_set_fail(req);
1485 } else if (!fixed) {
1486 fd_install(fd, file);
1487 ret = fd;
1488 } else {
1489 ret = io_fixed_fd_install(req, issue_flags, file,
1490 sock->file_slot);
1491 }
1492 io_req_set_res(req, ret, 0);
1493 return IOU_OK;
1494 }
1495
io_connect_prep_async(struct io_kiocb * req)1496 int io_connect_prep_async(struct io_kiocb *req)
1497 {
1498 struct io_async_connect *io = req->async_data;
1499 struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect);
1500
1501 return move_addr_to_kernel(conn->addr, conn->addr_len, &io->address);
1502 }
1503
io_connect_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)1504 int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1505 {
1506 struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect);
1507
1508 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
1509 return -EINVAL;
1510
1511 conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1512 conn->addr_len = READ_ONCE(sqe->addr2);
1513 conn->in_progress = conn->seen_econnaborted = false;
1514 return 0;
1515 }
1516
io_connect(struct io_kiocb * req,unsigned int issue_flags)1517 int io_connect(struct io_kiocb *req, unsigned int issue_flags)
1518 {
1519 struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect);
1520 struct io_async_connect __io, *io;
1521 unsigned file_flags;
1522 int ret;
1523 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1524
1525 if (req_has_async_data(req)) {
1526 io = req->async_data;
1527 } else {
1528 ret = move_addr_to_kernel(connect->addr,
1529 connect->addr_len,
1530 &__io.address);
1531 if (ret)
1532 goto out;
1533 io = &__io;
1534 }
1535
1536 file_flags = force_nonblock ? O_NONBLOCK : 0;
1537
1538 ret = __sys_connect_file(req->file, &io->address,
1539 connect->addr_len, file_flags);
1540 if ((ret == -EAGAIN || ret == -EINPROGRESS || ret == -ECONNABORTED)
1541 && force_nonblock) {
1542 if (ret == -EINPROGRESS) {
1543 connect->in_progress = true;
1544 } else if (ret == -ECONNABORTED) {
1545 if (connect->seen_econnaborted)
1546 goto out;
1547 connect->seen_econnaborted = true;
1548 }
1549 if (req_has_async_data(req))
1550 return -EAGAIN;
1551 if (io_alloc_async_data(req)) {
1552 ret = -ENOMEM;
1553 goto out;
1554 }
1555 memcpy(req->async_data, &__io, sizeof(__io));
1556 return -EAGAIN;
1557 }
1558 if (connect->in_progress) {
1559 /*
1560 * At least bluetooth will return -EBADFD on a re-connect
1561 * attempt, and it's (supposedly) also valid to get -EISCONN
1562 * which means the previous result is good. For both of these,
1563 * grab the sock_error() and use that for the completion.
1564 */
1565 if (ret == -EBADFD || ret == -EISCONN)
1566 ret = sock_error(sock_from_file(req->file)->sk);
1567 }
1568 if (ret == -ERESTARTSYS)
1569 ret = -EINTR;
1570 out:
1571 if (ret < 0)
1572 req_set_fail(req);
1573 io_req_set_res(req, ret, 0);
1574 return IOU_OK;
1575 }
1576
io_netmsg_cache_free(struct io_cache_entry * entry)1577 void io_netmsg_cache_free(struct io_cache_entry *entry)
1578 {
1579 kfree(container_of(entry, struct io_async_msghdr, cache));
1580 }
1581 #endif
1582