17875e18eSAndy Grover /*
2e228a5d0SKa-Cheong Poon * Copyright (c) 2006, 2020 Oracle and/or its affiliates.
37875e18eSAndy Grover *
47875e18eSAndy Grover * This software is available to you under a choice of one of two
57875e18eSAndy Grover * licenses. You may choose to be licensed under the terms of the GNU
67875e18eSAndy Grover * General Public License (GPL) Version 2, available from the file
77875e18eSAndy Grover * COPYING in the main directory of this source tree, or the
87875e18eSAndy Grover * OpenIB.org BSD license below:
97875e18eSAndy Grover *
107875e18eSAndy Grover * Redistribution and use in source and binary forms, with or
117875e18eSAndy Grover * without modification, are permitted provided that the following
127875e18eSAndy Grover * conditions are met:
137875e18eSAndy Grover *
147875e18eSAndy Grover * - Redistributions of source code must retain the above
157875e18eSAndy Grover * copyright notice, this list of conditions and the following
167875e18eSAndy Grover * disclaimer.
177875e18eSAndy Grover *
187875e18eSAndy Grover * - Redistributions in binary form must reproduce the above
197875e18eSAndy Grover * copyright notice, this list of conditions and the following
207875e18eSAndy Grover * disclaimer in the documentation and/or other materials
217875e18eSAndy Grover * provided with the distribution.
227875e18eSAndy Grover *
237875e18eSAndy Grover * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
247875e18eSAndy Grover * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
257875e18eSAndy Grover * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
267875e18eSAndy Grover * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
277875e18eSAndy Grover * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
287875e18eSAndy Grover * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
297875e18eSAndy Grover * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
307875e18eSAndy Grover * SOFTWARE.
317875e18eSAndy Grover *
327875e18eSAndy Grover */
337875e18eSAndy Grover #include <linux/kernel.h>
345a0e3ad6STejun Heo #include <linux/slab.h>
35bc3b2d7fSPaul Gortmaker #include <linux/export.h>
3601883edaSSowmini Varadhan #include <linux/skbuff.h>
3701883edaSSowmini Varadhan #include <linux/list.h>
3801883edaSSowmini Varadhan #include <linux/errqueue.h>
397875e18eSAndy Grover
407875e18eSAndy Grover #include "rds.h"
417875e18eSAndy Grover
427875e18eSAndy Grover static unsigned int rds_exthdr_size[__RDS_EXTHDR_MAX] = {
437875e18eSAndy Grover [RDS_EXTHDR_NONE] = 0,
447875e18eSAndy Grover [RDS_EXTHDR_VERSION] = sizeof(struct rds_ext_header_version),
457875e18eSAndy Grover [RDS_EXTHDR_RDMA] = sizeof(struct rds_ext_header_rdma),
467875e18eSAndy Grover [RDS_EXTHDR_RDMA_DEST] = sizeof(struct rds_ext_header_rdma_dest),
475916e2c1SSowmini Varadhan [RDS_EXTHDR_NPATHS] = sizeof(u16),
48905dd418SSowmini Varadhan [RDS_EXTHDR_GEN_NUM] = sizeof(u32),
497875e18eSAndy Grover };
507875e18eSAndy Grover
rds_message_addref(struct rds_message * rm)517875e18eSAndy Grover void rds_message_addref(struct rds_message *rm)
527875e18eSAndy Grover {
536c5a1c4aSReshetova, Elena rdsdebug("addref rm %p ref %d\n", rm, refcount_read(&rm->m_refcount));
546c5a1c4aSReshetova, Elena refcount_inc(&rm->m_refcount);
557875e18eSAndy Grover }
56616b757aSAndy Grover EXPORT_SYMBOL_GPL(rds_message_addref);
577875e18eSAndy Grover
rds_zcookie_add(struct rds_msg_zcopy_info * info,u32 cookie)589426bbc6SSowmini Varadhan static inline bool rds_zcookie_add(struct rds_msg_zcopy_info *info, u32 cookie)
5901883edaSSowmini Varadhan {
609426bbc6SSowmini Varadhan struct rds_zcopy_cookies *ck = &info->zcookies;
61401910dbSSowmini Varadhan int ncookies = ck->num;
6201883edaSSowmini Varadhan
63401910dbSSowmini Varadhan if (ncookies == RDS_MAX_ZCOOKIES)
6401883edaSSowmini Varadhan return false;
65401910dbSSowmini Varadhan ck->cookies[ncookies] = cookie;
66401910dbSSowmini Varadhan ck->num = ++ncookies;
6701883edaSSowmini Varadhan return true;
6801883edaSSowmini Varadhan }
6901883edaSSowmini Varadhan
rds_info_from_znotifier(struct rds_znotifier * znotif)70571e6776Skbuild test robot static struct rds_msg_zcopy_info *rds_info_from_znotifier(struct rds_znotifier *znotif)
719426bbc6SSowmini Varadhan {
729426bbc6SSowmini Varadhan return container_of(znotif, struct rds_msg_zcopy_info, znotif);
739426bbc6SSowmini Varadhan }
749426bbc6SSowmini Varadhan
rds_notify_msg_zcopy_purge(struct rds_msg_zcopy_queue * q)759426bbc6SSowmini Varadhan void rds_notify_msg_zcopy_purge(struct rds_msg_zcopy_queue *q)
769426bbc6SSowmini Varadhan {
779426bbc6SSowmini Varadhan unsigned long flags;
789426bbc6SSowmini Varadhan LIST_HEAD(copy);
799426bbc6SSowmini Varadhan struct rds_msg_zcopy_info *info, *tmp;
809426bbc6SSowmini Varadhan
819426bbc6SSowmini Varadhan spin_lock_irqsave(&q->lock, flags);
829426bbc6SSowmini Varadhan list_splice(&q->zcookie_head, ©);
839426bbc6SSowmini Varadhan INIT_LIST_HEAD(&q->zcookie_head);
849426bbc6SSowmini Varadhan spin_unlock_irqrestore(&q->lock, flags);
859426bbc6SSowmini Varadhan
869426bbc6SSowmini Varadhan list_for_each_entry_safe(info, tmp, ©, rs_zcookie_next) {
879426bbc6SSowmini Varadhan list_del(&info->rs_zcookie_next);
889426bbc6SSowmini Varadhan kfree(info);
899426bbc6SSowmini Varadhan }
909426bbc6SSowmini Varadhan }
919426bbc6SSowmini Varadhan
rds_rm_zerocopy_callback(struct rds_sock * rs,struct rds_znotifier * znotif)9201883edaSSowmini Varadhan static void rds_rm_zerocopy_callback(struct rds_sock *rs,
9301883edaSSowmini Varadhan struct rds_znotifier *znotif)
9401883edaSSowmini Varadhan {
959426bbc6SSowmini Varadhan struct rds_msg_zcopy_info *info;
969426bbc6SSowmini Varadhan struct rds_msg_zcopy_queue *q;
9701883edaSSowmini Varadhan u32 cookie = znotif->z_cookie;
98401910dbSSowmini Varadhan struct rds_zcopy_cookies *ck;
999426bbc6SSowmini Varadhan struct list_head *head;
1009426bbc6SSowmini Varadhan unsigned long flags;
10101883edaSSowmini Varadhan
1029426bbc6SSowmini Varadhan mm_unaccount_pinned_pages(&znotif->z_mmp);
103401910dbSSowmini Varadhan q = &rs->rs_zcookie_queue;
10401883edaSSowmini Varadhan spin_lock_irqsave(&q->lock, flags);
1059426bbc6SSowmini Varadhan head = &q->zcookie_head;
1069426bbc6SSowmini Varadhan if (!list_empty(head)) {
107f753a689SPietro Borrello info = list_first_entry(head, struct rds_msg_zcopy_info,
1089426bbc6SSowmini Varadhan rs_zcookie_next);
109f753a689SPietro Borrello if (rds_zcookie_add(info, cookie)) {
11001883edaSSowmini Varadhan spin_unlock_irqrestore(&q->lock, flags);
1119426bbc6SSowmini Varadhan kfree(rds_info_from_znotifier(znotif));
112401910dbSSowmini Varadhan /* caller invokes rds_wake_sk_sleep() */
11301883edaSSowmini Varadhan return;
11401883edaSSowmini Varadhan }
1159426bbc6SSowmini Varadhan }
11601883edaSSowmini Varadhan
1179426bbc6SSowmini Varadhan info = rds_info_from_znotifier(znotif);
1189426bbc6SSowmini Varadhan ck = &info->zcookies;
119401910dbSSowmini Varadhan memset(ck, 0, sizeof(*ck));
1209426bbc6SSowmini Varadhan WARN_ON(!rds_zcookie_add(info, cookie));
121*68762148SPietro Borrello list_add_tail(&info->rs_zcookie_next, &q->zcookie_head);
12201883edaSSowmini Varadhan
12301883edaSSowmini Varadhan spin_unlock_irqrestore(&q->lock, flags);
124401910dbSSowmini Varadhan /* caller invokes rds_wake_sk_sleep() */
12501883edaSSowmini Varadhan }
12601883edaSSowmini Varadhan
1277875e18eSAndy Grover /*
1287875e18eSAndy Grover * This relies on dma_map_sg() not touching sg[].page during merging.
1297875e18eSAndy Grover */
rds_message_purge(struct rds_message * rm)1307875e18eSAndy Grover static void rds_message_purge(struct rds_message *rm)
1317875e18eSAndy Grover {
132ea8994cbSSowmini Varadhan unsigned long i, flags;
13301883edaSSowmini Varadhan bool zcopy = false;
1347875e18eSAndy Grover
1357875e18eSAndy Grover if (unlikely(test_bit(RDS_MSG_PAGEVEC, &rm->m_flags)))
1367875e18eSAndy Grover return;
1377875e18eSAndy Grover
138ea8994cbSSowmini Varadhan spin_lock_irqsave(&rm->m_rs_lock, flags);
139ea8994cbSSowmini Varadhan if (rm->m_rs) {
14001883edaSSowmini Varadhan struct rds_sock *rs = rm->m_rs;
14101883edaSSowmini Varadhan
14201883edaSSowmini Varadhan if (rm->data.op_mmp_znotifier) {
14301883edaSSowmini Varadhan zcopy = true;
14401883edaSSowmini Varadhan rds_rm_zerocopy_callback(rs, rm->data.op_mmp_znotifier);
145401910dbSSowmini Varadhan rds_wake_sk_sleep(rs);
14601883edaSSowmini Varadhan rm->data.op_mmp_znotifier = NULL;
14701883edaSSowmini Varadhan }
14801883edaSSowmini Varadhan sock_put(rds_rs_to_sk(rs));
149ea8994cbSSowmini Varadhan rm->m_rs = NULL;
150ea8994cbSSowmini Varadhan }
151ea8994cbSSowmini Varadhan spin_unlock_irqrestore(&rm->m_rs_lock, flags);
1527875e18eSAndy Grover
15301883edaSSowmini Varadhan for (i = 0; i < rm->data.op_nents; i++) {
15401883edaSSowmini Varadhan /* XXX will have to put_page for page refs */
15501883edaSSowmini Varadhan if (!zcopy)
15601883edaSSowmini Varadhan __free_page(sg_page(&rm->data.op_sg[i]));
15701883edaSSowmini Varadhan else
15801883edaSSowmini Varadhan put_page(sg_page(&rm->data.op_sg[i]));
15901883edaSSowmini Varadhan }
16001883edaSSowmini Varadhan rm->data.op_nents = 0;
16101883edaSSowmini Varadhan
162f8b3aaf2SAndy Grover if (rm->rdma.op_active)
163f8b3aaf2SAndy Grover rds_rdma_free_op(&rm->rdma);
164f8b3aaf2SAndy Grover if (rm->rdma.op_rdma_mr)
165e228a5d0SKa-Cheong Poon kref_put(&rm->rdma.op_rdma_mr->r_kref, __rds_put_mr_final);
166d0ab25a8SAndy Grover
167d0ab25a8SAndy Grover if (rm->atomic.op_active)
168d0ab25a8SAndy Grover rds_atomic_free_op(&rm->atomic);
169d0ab25a8SAndy Grover if (rm->atomic.op_rdma_mr)
170e228a5d0SKa-Cheong Poon kref_put(&rm->atomic.op_rdma_mr->r_kref, __rds_put_mr_final);
1717875e18eSAndy Grover }
1727875e18eSAndy Grover
rds_message_put(struct rds_message * rm)1737875e18eSAndy Grover void rds_message_put(struct rds_message *rm)
1747875e18eSAndy Grover {
1756c5a1c4aSReshetova, Elena rdsdebug("put rm %p ref %d\n", rm, refcount_read(&rm->m_refcount));
1766c5a1c4aSReshetova, Elena WARN(!refcount_read(&rm->m_refcount), "danger refcount zero on %p\n", rm);
1776c5a1c4aSReshetova, Elena if (refcount_dec_and_test(&rm->m_refcount)) {
1787875e18eSAndy Grover BUG_ON(!list_empty(&rm->m_sock_item));
1797875e18eSAndy Grover BUG_ON(!list_empty(&rm->m_conn_item));
1807875e18eSAndy Grover rds_message_purge(rm);
1817875e18eSAndy Grover
1827875e18eSAndy Grover kfree(rm);
1837875e18eSAndy Grover }
1847875e18eSAndy Grover }
185616b757aSAndy Grover EXPORT_SYMBOL_GPL(rds_message_put);
1867875e18eSAndy Grover
rds_message_populate_header(struct rds_header * hdr,__be16 sport,__be16 dport,u64 seq)1877875e18eSAndy Grover void rds_message_populate_header(struct rds_header *hdr, __be16 sport,
1887875e18eSAndy Grover __be16 dport, u64 seq)
1897875e18eSAndy Grover {
1907875e18eSAndy Grover hdr->h_flags = 0;
1917875e18eSAndy Grover hdr->h_sport = sport;
1927875e18eSAndy Grover hdr->h_dport = dport;
1937875e18eSAndy Grover hdr->h_sequence = cpu_to_be64(seq);
1947875e18eSAndy Grover hdr->h_exthdr[0] = RDS_EXTHDR_NONE;
1957875e18eSAndy Grover }
196616b757aSAndy Grover EXPORT_SYMBOL_GPL(rds_message_populate_header);
1977875e18eSAndy Grover
rds_message_add_extension(struct rds_header * hdr,unsigned int type,const void * data,unsigned int len)198ff51bf84Sstephen hemminger int rds_message_add_extension(struct rds_header *hdr, unsigned int type,
199ff51bf84Sstephen hemminger const void *data, unsigned int len)
2007875e18eSAndy Grover {
2017875e18eSAndy Grover unsigned int ext_len = sizeof(u8) + len;
2027875e18eSAndy Grover unsigned char *dst;
2037875e18eSAndy Grover
2047875e18eSAndy Grover /* For now, refuse to add more than one extension header */
2057875e18eSAndy Grover if (hdr->h_exthdr[0] != RDS_EXTHDR_NONE)
2067875e18eSAndy Grover return 0;
2077875e18eSAndy Grover
208f64f9e71SJoe Perches if (type >= __RDS_EXTHDR_MAX || len != rds_exthdr_size[type])
2097875e18eSAndy Grover return 0;
2107875e18eSAndy Grover
2117875e18eSAndy Grover if (ext_len >= RDS_HEADER_EXT_SPACE)
2127875e18eSAndy Grover return 0;
2137875e18eSAndy Grover dst = hdr->h_exthdr;
2147875e18eSAndy Grover
2157875e18eSAndy Grover *dst++ = type;
2167875e18eSAndy Grover memcpy(dst, data, len);
2177875e18eSAndy Grover
2187875e18eSAndy Grover dst[len] = RDS_EXTHDR_NONE;
2197875e18eSAndy Grover return 1;
2207875e18eSAndy Grover }
221616b757aSAndy Grover EXPORT_SYMBOL_GPL(rds_message_add_extension);
2227875e18eSAndy Grover
2237875e18eSAndy Grover /*
2247875e18eSAndy Grover * If a message has extension headers, retrieve them here.
2257875e18eSAndy Grover * Call like this:
2267875e18eSAndy Grover *
2277875e18eSAndy Grover * unsigned int pos = 0;
2287875e18eSAndy Grover *
2297875e18eSAndy Grover * while (1) {
2307875e18eSAndy Grover * buflen = sizeof(buffer);
2317875e18eSAndy Grover * type = rds_message_next_extension(hdr, &pos, buffer, &buflen);
2327875e18eSAndy Grover * if (type == RDS_EXTHDR_NONE)
2337875e18eSAndy Grover * break;
2347875e18eSAndy Grover * ...
2357875e18eSAndy Grover * }
2367875e18eSAndy Grover */
rds_message_next_extension(struct rds_header * hdr,unsigned int * pos,void * buf,unsigned int * buflen)2377875e18eSAndy Grover int rds_message_next_extension(struct rds_header *hdr,
2387875e18eSAndy Grover unsigned int *pos, void *buf, unsigned int *buflen)
2397875e18eSAndy Grover {
2407875e18eSAndy Grover unsigned int offset, ext_type, ext_len;
2417875e18eSAndy Grover u8 *src = hdr->h_exthdr;
2427875e18eSAndy Grover
2437875e18eSAndy Grover offset = *pos;
2447875e18eSAndy Grover if (offset >= RDS_HEADER_EXT_SPACE)
2457875e18eSAndy Grover goto none;
2467875e18eSAndy Grover
2477875e18eSAndy Grover /* Get the extension type and length. For now, the
2487875e18eSAndy Grover * length is implied by the extension type. */
2497875e18eSAndy Grover ext_type = src[offset++];
2507875e18eSAndy Grover
2517875e18eSAndy Grover if (ext_type == RDS_EXTHDR_NONE || ext_type >= __RDS_EXTHDR_MAX)
2527875e18eSAndy Grover goto none;
2537875e18eSAndy Grover ext_len = rds_exthdr_size[ext_type];
2547875e18eSAndy Grover if (offset + ext_len > RDS_HEADER_EXT_SPACE)
2557875e18eSAndy Grover goto none;
2567875e18eSAndy Grover
2577875e18eSAndy Grover *pos = offset + ext_len;
2587875e18eSAndy Grover if (ext_len < *buflen)
2597875e18eSAndy Grover *buflen = ext_len;
2607875e18eSAndy Grover memcpy(buf, src + offset, *buflen);
2617875e18eSAndy Grover return ext_type;
2627875e18eSAndy Grover
2637875e18eSAndy Grover none:
2647875e18eSAndy Grover *pos = RDS_HEADER_EXT_SPACE;
2657875e18eSAndy Grover *buflen = 0;
2667875e18eSAndy Grover return RDS_EXTHDR_NONE;
2677875e18eSAndy Grover }
2687875e18eSAndy Grover
rds_message_add_rdma_dest_extension(struct rds_header * hdr,u32 r_key,u32 offset)2697875e18eSAndy Grover int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 offset)
2707875e18eSAndy Grover {
2717875e18eSAndy Grover struct rds_ext_header_rdma_dest ext_hdr;
2727875e18eSAndy Grover
2737875e18eSAndy Grover ext_hdr.h_rdma_rkey = cpu_to_be32(r_key);
2747875e18eSAndy Grover ext_hdr.h_rdma_offset = cpu_to_be32(offset);
2757875e18eSAndy Grover return rds_message_add_extension(hdr, RDS_EXTHDR_RDMA_DEST, &ext_hdr, sizeof(ext_hdr));
2767875e18eSAndy Grover }
277616b757aSAndy Grover EXPORT_SYMBOL_GPL(rds_message_add_rdma_dest_extension);
2787875e18eSAndy Grover
279fc445084SAndy Grover /*
280fc445084SAndy Grover * Each rds_message is allocated with extra space for the scatterlist entries
281fc445084SAndy Grover * rds ops will need. This is to minimize memory allocation count. Then, each rds op
282fc445084SAndy Grover * can grab SGs when initializing its part of the rds_message.
283fc445084SAndy Grover */
rds_message_alloc(unsigned int extra_len,gfp_t gfp)284fc445084SAndy Grover struct rds_message *rds_message_alloc(unsigned int extra_len, gfp_t gfp)
2857875e18eSAndy Grover {
2867875e18eSAndy Grover struct rds_message *rm;
2877875e18eSAndy Grover
288ece6b0a2SCong Wang if (extra_len > KMALLOC_MAX_SIZE - sizeof(struct rds_message))
289ece6b0a2SCong Wang return NULL;
290ece6b0a2SCong Wang
291fc445084SAndy Grover rm = kzalloc(sizeof(struct rds_message) + extra_len, gfp);
2927875e18eSAndy Grover if (!rm)
2937875e18eSAndy Grover goto out;
2947875e18eSAndy Grover
295fc445084SAndy Grover rm->m_used_sgs = 0;
296fc445084SAndy Grover rm->m_total_sgs = extra_len / sizeof(struct scatterlist);
297fc445084SAndy Grover
2986c5a1c4aSReshetova, Elena refcount_set(&rm->m_refcount, 1);
2997875e18eSAndy Grover INIT_LIST_HEAD(&rm->m_sock_item);
3007875e18eSAndy Grover INIT_LIST_HEAD(&rm->m_conn_item);
3017875e18eSAndy Grover spin_lock_init(&rm->m_rs_lock);
302c83188dcSChris Mason init_waitqueue_head(&rm->m_flush_wait);
3037875e18eSAndy Grover
3047875e18eSAndy Grover out:
3057875e18eSAndy Grover return rm;
3067875e18eSAndy Grover }
3077875e18eSAndy Grover
308fc445084SAndy Grover /*
309fc445084SAndy Grover * RDS ops use this to grab SG entries from the rm's sg pool.
310fc445084SAndy Grover */
rds_message_alloc_sgs(struct rds_message * rm,int nents)3117dba9203SJason Gunthorpe struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents)
312fc445084SAndy Grover {
313fc445084SAndy Grover struct scatterlist *sg_first = (struct scatterlist *) &rm[1];
314fc445084SAndy Grover struct scatterlist *sg_ret;
315fc445084SAndy Grover
316c75ab8a5Sshamir rabinovitch if (nents <= 0) {
317c75ab8a5Sshamir rabinovitch pr_warn("rds: alloc sgs failed! nents <= 0\n");
3187dba9203SJason Gunthorpe return ERR_PTR(-EINVAL);
319c75ab8a5Sshamir rabinovitch }
320c75ab8a5Sshamir rabinovitch
321c75ab8a5Sshamir rabinovitch if (rm->m_used_sgs + nents > rm->m_total_sgs) {
322c75ab8a5Sshamir rabinovitch pr_warn("rds: alloc sgs failed! total %d used %d nents %d\n",
323c75ab8a5Sshamir rabinovitch rm->m_total_sgs, rm->m_used_sgs, nents);
3247dba9203SJason Gunthorpe return ERR_PTR(-ENOMEM);
325c75ab8a5Sshamir rabinovitch }
326c75ab8a5Sshamir rabinovitch
327fc445084SAndy Grover sg_ret = &sg_first[rm->m_used_sgs];
328f4dd96f7SAndy Grover sg_init_table(sg_ret, nents);
329fc445084SAndy Grover rm->m_used_sgs += nents;
330fc445084SAndy Grover
331fc445084SAndy Grover return sg_ret;
332fc445084SAndy Grover }
333fc445084SAndy Grover
rds_message_map_pages(unsigned long * page_addrs,unsigned int total_len)3347875e18eSAndy Grover struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len)
3357875e18eSAndy Grover {
3367875e18eSAndy Grover struct rds_message *rm;
3377875e18eSAndy Grover unsigned int i;
338eeb2c4fbSJacob Wen int num_sgs = DIV_ROUND_UP(total_len, PAGE_SIZE);
339ff87e97aSAndy Grover int extra_bytes = num_sgs * sizeof(struct scatterlist);
3407875e18eSAndy Grover
341f2ec76f2SAndy Grover rm = rds_message_alloc(extra_bytes, GFP_NOWAIT);
3428690bfa1SAndy Grover if (!rm)
3437875e18eSAndy Grover return ERR_PTR(-ENOMEM);
3447875e18eSAndy Grover
3457875e18eSAndy Grover set_bit(RDS_MSG_PAGEVEC, &rm->m_flags);
3467875e18eSAndy Grover rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len);
347eeb2c4fbSJacob Wen rm->data.op_nents = DIV_ROUND_UP(total_len, PAGE_SIZE);
3487dba9203SJason Gunthorpe rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs);
3497dba9203SJason Gunthorpe if (IS_ERR(rm->data.op_sg)) {
350bdc2ab5cSLv Yunlong void *err = ERR_CAST(rm->data.op_sg);
351aa58163aSPavel Emelyanov rds_message_put(rm);
352bdc2ab5cSLv Yunlong return err;
353aa58163aSPavel Emelyanov }
3547875e18eSAndy Grover
3556c7cc6e4SAndy Grover for (i = 0; i < rm->data.op_nents; ++i) {
3566c7cc6e4SAndy Grover sg_set_page(&rm->data.op_sg[i],
357a60511cfSLinus Walleij virt_to_page((void *)page_addrs[i]),
3587875e18eSAndy Grover PAGE_SIZE, 0);
3597875e18eSAndy Grover }
3607875e18eSAndy Grover
3617875e18eSAndy Grover return rm;
3627875e18eSAndy Grover }
3637875e18eSAndy Grover
rds_message_zcopy_from_user(struct rds_message * rm,struct iov_iter * from)364496c7f3cSkbuild test robot static int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter *from)
3657875e18eSAndy Grover {
3667875e18eSAndy Grover struct scatterlist *sg;
367fc445084SAndy Grover int ret = 0;
3680cebacceSSowmini Varadhan int length = iov_iter_count(from);
3699426bbc6SSowmini Varadhan struct rds_msg_zcopy_info *info;
3707875e18eSAndy Grover
371083735f4SAl Viro rm->m_inc.i_hdr.h_len = cpu_to_be32(iov_iter_count(from));
3727875e18eSAndy Grover
3737875e18eSAndy Grover /*
3747875e18eSAndy Grover * now allocate and copy in the data payload.
3757875e18eSAndy Grover */
3766c7cc6e4SAndy Grover sg = rm->data.op_sg;
3777875e18eSAndy Grover
3789426bbc6SSowmini Varadhan info = kzalloc(sizeof(*info), GFP_KERNEL);
3799426bbc6SSowmini Varadhan if (!info)
3800cebacceSSowmini Varadhan return -ENOMEM;
3819426bbc6SSowmini Varadhan INIT_LIST_HEAD(&info->rs_zcookie_next);
3829426bbc6SSowmini Varadhan rm->data.op_mmp_znotifier = &info->znotif;
3830cebacceSSowmini Varadhan if (mm_account_pinned_pages(&rm->data.op_mmp_znotifier->z_mmp,
3840cebacceSSowmini Varadhan length)) {
3850cebacceSSowmini Varadhan ret = -ENOMEM;
3860cebacceSSowmini Varadhan goto err;
3870cebacceSSowmini Varadhan }
3880cebacceSSowmini Varadhan while (iov_iter_count(from)) {
3890cebacceSSowmini Varadhan struct page *pages;
3900cebacceSSowmini Varadhan size_t start;
3910cebacceSSowmini Varadhan ssize_t copied;
3920cebacceSSowmini Varadhan
3931ef255e2SAl Viro copied = iov_iter_get_pages2(from, &pages, PAGE_SIZE,
3940cebacceSSowmini Varadhan 1, &start);
3950cebacceSSowmini Varadhan if (copied < 0) {
3960cebacceSSowmini Varadhan struct mmpin *mmp;
3970cebacceSSowmini Varadhan int i;
3980cebacceSSowmini Varadhan
3990cebacceSSowmini Varadhan for (i = 0; i < rm->data.op_nents; i++)
4000cebacceSSowmini Varadhan put_page(sg_page(&rm->data.op_sg[i]));
4010cebacceSSowmini Varadhan mmp = &rm->data.op_mmp_znotifier->z_mmp;
4020cebacceSSowmini Varadhan mm_unaccount_pinned_pages(mmp);
4030cebacceSSowmini Varadhan ret = -EFAULT;
4040cebacceSSowmini Varadhan goto err;
4050cebacceSSowmini Varadhan }
4060cebacceSSowmini Varadhan length -= copied;
4070cebacceSSowmini Varadhan sg_set_page(sg, pages, copied, start);
4080cebacceSSowmini Varadhan rm->data.op_nents++;
4090cebacceSSowmini Varadhan sg++;
4100cebacceSSowmini Varadhan }
4110cebacceSSowmini Varadhan WARN_ON_ONCE(length != 0);
4120cebacceSSowmini Varadhan return ret;
4130cebacceSSowmini Varadhan err:
4149426bbc6SSowmini Varadhan kfree(info);
4150cebacceSSowmini Varadhan rm->data.op_mmp_znotifier = NULL;
4160cebacceSSowmini Varadhan return ret;
417d40a126bSSowmini Varadhan }
418d40a126bSSowmini Varadhan
rds_message_copy_from_user(struct rds_message * rm,struct iov_iter * from,bool zcopy)419d40a126bSSowmini Varadhan int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
420d40a126bSSowmini Varadhan bool zcopy)
421d40a126bSSowmini Varadhan {
422d40a126bSSowmini Varadhan unsigned long to_copy, nbytes;
423d40a126bSSowmini Varadhan unsigned long sg_off;
424d40a126bSSowmini Varadhan struct scatterlist *sg;
425d40a126bSSowmini Varadhan int ret = 0;
426d40a126bSSowmini Varadhan
427d40a126bSSowmini Varadhan rm->m_inc.i_hdr.h_len = cpu_to_be32(iov_iter_count(from));
428d40a126bSSowmini Varadhan
429d40a126bSSowmini Varadhan /* now allocate and copy in the data payload. */
430d40a126bSSowmini Varadhan sg = rm->data.op_sg;
431d40a126bSSowmini Varadhan sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */
432d40a126bSSowmini Varadhan
433d40a126bSSowmini Varadhan if (zcopy)
434d40a126bSSowmini Varadhan return rds_message_zcopy_from_user(rm, from);
4350cebacceSSowmini Varadhan
436083735f4SAl Viro while (iov_iter_count(from)) {
4378690bfa1SAndy Grover if (!sg_page(sg)) {
438083735f4SAl Viro ret = rds_page_remainder_alloc(sg, iov_iter_count(from),
4397875e18eSAndy Grover GFP_HIGHUSER);
4407875e18eSAndy Grover if (ret)
441083735f4SAl Viro return ret;
4426c7cc6e4SAndy Grover rm->data.op_nents++;
4437875e18eSAndy Grover sg_off = 0;
4447875e18eSAndy Grover }
4457875e18eSAndy Grover
446083735f4SAl Viro to_copy = min_t(unsigned long, iov_iter_count(from),
447083735f4SAl Viro sg->length - sg_off);
4487875e18eSAndy Grover
449083735f4SAl Viro rds_stats_add(s_copy_from_user, to_copy);
450d0a47d32SSowmini Varadhan nbytes = copy_page_from_iter(sg_page(sg), sg->offset + sg_off,
451083735f4SAl Viro to_copy, from);
452d0a47d32SSowmini Varadhan if (nbytes != to_copy)
453083735f4SAl Viro return -EFAULT;
4547875e18eSAndy Grover
4557875e18eSAndy Grover sg_off += to_copy;
4567875e18eSAndy Grover
4577875e18eSAndy Grover if (sg_off == sg->length)
4587875e18eSAndy Grover sg++;
4597875e18eSAndy Grover }
4607875e18eSAndy Grover
461fc445084SAndy Grover return ret;
4627875e18eSAndy Grover }
4637875e18eSAndy Grover
rds_message_inc_copy_to_user(struct rds_incoming * inc,struct iov_iter * to)464c310e72cSAl Viro int rds_message_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to)
4657875e18eSAndy Grover {
4667875e18eSAndy Grover struct rds_message *rm;
4677875e18eSAndy Grover struct scatterlist *sg;
4687875e18eSAndy Grover unsigned long to_copy;
4697875e18eSAndy Grover unsigned long vec_off;
4707875e18eSAndy Grover int copied;
4717875e18eSAndy Grover int ret;
4727875e18eSAndy Grover u32 len;
4737875e18eSAndy Grover
4747875e18eSAndy Grover rm = container_of(inc, struct rds_message, m_inc);
4757875e18eSAndy Grover len = be32_to_cpu(rm->m_inc.i_hdr.h_len);
4767875e18eSAndy Grover
4776c7cc6e4SAndy Grover sg = rm->data.op_sg;
4787875e18eSAndy Grover vec_off = 0;
4797875e18eSAndy Grover copied = 0;
4807875e18eSAndy Grover
481c310e72cSAl Viro while (iov_iter_count(to) && copied < len) {
4826ff4a8adSGeert Uytterhoeven to_copy = min_t(unsigned long, iov_iter_count(to),
4836ff4a8adSGeert Uytterhoeven sg->length - vec_off);
4847875e18eSAndy Grover to_copy = min_t(unsigned long, to_copy, len - copied);
4857875e18eSAndy Grover
486c310e72cSAl Viro rds_stats_add(s_copy_to_user, to_copy);
487c310e72cSAl Viro ret = copy_page_to_iter(sg_page(sg), sg->offset + vec_off,
488c310e72cSAl Viro to_copy, to);
489c310e72cSAl Viro if (ret != to_copy)
490c310e72cSAl Viro return -EFAULT;
4917875e18eSAndy Grover
4927875e18eSAndy Grover vec_off += to_copy;
4937875e18eSAndy Grover copied += to_copy;
4947875e18eSAndy Grover
4957875e18eSAndy Grover if (vec_off == sg->length) {
4967875e18eSAndy Grover vec_off = 0;
4977875e18eSAndy Grover sg++;
4987875e18eSAndy Grover }
4997875e18eSAndy Grover }
5007875e18eSAndy Grover
5017875e18eSAndy Grover return copied;
5027875e18eSAndy Grover }
5037875e18eSAndy Grover
5047875e18eSAndy Grover /*
5057875e18eSAndy Grover * If the message is still on the send queue, wait until the transport
5067875e18eSAndy Grover * is done with it. This is particularly important for RDMA operations.
5077875e18eSAndy Grover */
rds_message_wait(struct rds_message * rm)5087875e18eSAndy Grover void rds_message_wait(struct rds_message *rm)
5097875e18eSAndy Grover {
510c83188dcSChris Mason wait_event_interruptible(rm->m_flush_wait,
5117875e18eSAndy Grover !test_bit(RDS_MSG_MAPPED, &rm->m_flags));
5127875e18eSAndy Grover }
5137875e18eSAndy Grover
rds_message_unmapped(struct rds_message * rm)5147875e18eSAndy Grover void rds_message_unmapped(struct rds_message *rm)
5157875e18eSAndy Grover {
5167875e18eSAndy Grover clear_bit(RDS_MSG_MAPPED, &rm->m_flags);
517c83188dcSChris Mason wake_up_interruptible(&rm->m_flush_wait);
5187875e18eSAndy Grover }
519616b757aSAndy Grover EXPORT_SYMBOL_GPL(rds_message_unmapped);
520