xref: /openbmc/linux/net/rds/message.c (revision 68762148)
17875e18eSAndy Grover /*
2e228a5d0SKa-Cheong Poon  * Copyright (c) 2006, 2020 Oracle and/or its affiliates.
37875e18eSAndy Grover  *
47875e18eSAndy Grover  * This software is available to you under a choice of one of two
57875e18eSAndy Grover  * licenses.  You may choose to be licensed under the terms of the GNU
67875e18eSAndy Grover  * General Public License (GPL) Version 2, available from the file
77875e18eSAndy Grover  * COPYING in the main directory of this source tree, or the
87875e18eSAndy Grover  * OpenIB.org BSD license below:
97875e18eSAndy Grover  *
107875e18eSAndy Grover  *     Redistribution and use in source and binary forms, with or
117875e18eSAndy Grover  *     without modification, are permitted provided that the following
127875e18eSAndy Grover  *     conditions are met:
137875e18eSAndy Grover  *
147875e18eSAndy Grover  *      - Redistributions of source code must retain the above
157875e18eSAndy Grover  *        copyright notice, this list of conditions and the following
167875e18eSAndy Grover  *        disclaimer.
177875e18eSAndy Grover  *
187875e18eSAndy Grover  *      - Redistributions in binary form must reproduce the above
197875e18eSAndy Grover  *        copyright notice, this list of conditions and the following
207875e18eSAndy Grover  *        disclaimer in the documentation and/or other materials
217875e18eSAndy Grover  *        provided with the distribution.
227875e18eSAndy Grover  *
237875e18eSAndy Grover  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
247875e18eSAndy Grover  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
257875e18eSAndy Grover  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
267875e18eSAndy Grover  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
277875e18eSAndy Grover  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
287875e18eSAndy Grover  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
297875e18eSAndy Grover  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
307875e18eSAndy Grover  * SOFTWARE.
317875e18eSAndy Grover  *
327875e18eSAndy Grover  */
337875e18eSAndy Grover #include <linux/kernel.h>
345a0e3ad6STejun Heo #include <linux/slab.h>
35bc3b2d7fSPaul Gortmaker #include <linux/export.h>
3601883edaSSowmini Varadhan #include <linux/skbuff.h>
3701883edaSSowmini Varadhan #include <linux/list.h>
3801883edaSSowmini Varadhan #include <linux/errqueue.h>
397875e18eSAndy Grover 
407875e18eSAndy Grover #include "rds.h"
417875e18eSAndy Grover 
427875e18eSAndy Grover static unsigned int	rds_exthdr_size[__RDS_EXTHDR_MAX] = {
437875e18eSAndy Grover [RDS_EXTHDR_NONE]	= 0,
447875e18eSAndy Grover [RDS_EXTHDR_VERSION]	= sizeof(struct rds_ext_header_version),
457875e18eSAndy Grover [RDS_EXTHDR_RDMA]	= sizeof(struct rds_ext_header_rdma),
467875e18eSAndy Grover [RDS_EXTHDR_RDMA_DEST]	= sizeof(struct rds_ext_header_rdma_dest),
475916e2c1SSowmini Varadhan [RDS_EXTHDR_NPATHS]	= sizeof(u16),
48905dd418SSowmini Varadhan [RDS_EXTHDR_GEN_NUM]	= sizeof(u32),
497875e18eSAndy Grover };
507875e18eSAndy Grover 
rds_message_addref(struct rds_message * rm)517875e18eSAndy Grover void rds_message_addref(struct rds_message *rm)
527875e18eSAndy Grover {
536c5a1c4aSReshetova, Elena 	rdsdebug("addref rm %p ref %d\n", rm, refcount_read(&rm->m_refcount));
546c5a1c4aSReshetova, Elena 	refcount_inc(&rm->m_refcount);
557875e18eSAndy Grover }
56616b757aSAndy Grover EXPORT_SYMBOL_GPL(rds_message_addref);
577875e18eSAndy Grover 
rds_zcookie_add(struct rds_msg_zcopy_info * info,u32 cookie)589426bbc6SSowmini Varadhan static inline bool rds_zcookie_add(struct rds_msg_zcopy_info *info, u32 cookie)
5901883edaSSowmini Varadhan {
609426bbc6SSowmini Varadhan 	struct rds_zcopy_cookies *ck = &info->zcookies;
61401910dbSSowmini Varadhan 	int ncookies = ck->num;
6201883edaSSowmini Varadhan 
63401910dbSSowmini Varadhan 	if (ncookies == RDS_MAX_ZCOOKIES)
6401883edaSSowmini Varadhan 		return false;
65401910dbSSowmini Varadhan 	ck->cookies[ncookies] = cookie;
66401910dbSSowmini Varadhan 	ck->num =  ++ncookies;
6701883edaSSowmini Varadhan 	return true;
6801883edaSSowmini Varadhan }
6901883edaSSowmini Varadhan 
rds_info_from_znotifier(struct rds_znotifier * znotif)70571e6776Skbuild test robot static struct rds_msg_zcopy_info *rds_info_from_znotifier(struct rds_znotifier *znotif)
719426bbc6SSowmini Varadhan {
729426bbc6SSowmini Varadhan 	return container_of(znotif, struct rds_msg_zcopy_info, znotif);
739426bbc6SSowmini Varadhan }
749426bbc6SSowmini Varadhan 
rds_notify_msg_zcopy_purge(struct rds_msg_zcopy_queue * q)759426bbc6SSowmini Varadhan void rds_notify_msg_zcopy_purge(struct rds_msg_zcopy_queue *q)
769426bbc6SSowmini Varadhan {
779426bbc6SSowmini Varadhan 	unsigned long flags;
789426bbc6SSowmini Varadhan 	LIST_HEAD(copy);
799426bbc6SSowmini Varadhan 	struct rds_msg_zcopy_info *info, *tmp;
809426bbc6SSowmini Varadhan 
819426bbc6SSowmini Varadhan 	spin_lock_irqsave(&q->lock, flags);
829426bbc6SSowmini Varadhan 	list_splice(&q->zcookie_head, &copy);
839426bbc6SSowmini Varadhan 	INIT_LIST_HEAD(&q->zcookie_head);
849426bbc6SSowmini Varadhan 	spin_unlock_irqrestore(&q->lock, flags);
859426bbc6SSowmini Varadhan 
869426bbc6SSowmini Varadhan 	list_for_each_entry_safe(info, tmp, &copy, rs_zcookie_next) {
879426bbc6SSowmini Varadhan 		list_del(&info->rs_zcookie_next);
889426bbc6SSowmini Varadhan 		kfree(info);
899426bbc6SSowmini Varadhan 	}
909426bbc6SSowmini Varadhan }
919426bbc6SSowmini Varadhan 
rds_rm_zerocopy_callback(struct rds_sock * rs,struct rds_znotifier * znotif)9201883edaSSowmini Varadhan static void rds_rm_zerocopy_callback(struct rds_sock *rs,
9301883edaSSowmini Varadhan 				     struct rds_znotifier *znotif)
9401883edaSSowmini Varadhan {
959426bbc6SSowmini Varadhan 	struct rds_msg_zcopy_info *info;
969426bbc6SSowmini Varadhan 	struct rds_msg_zcopy_queue *q;
9701883edaSSowmini Varadhan 	u32 cookie = znotif->z_cookie;
98401910dbSSowmini Varadhan 	struct rds_zcopy_cookies *ck;
999426bbc6SSowmini Varadhan 	struct list_head *head;
1009426bbc6SSowmini Varadhan 	unsigned long flags;
10101883edaSSowmini Varadhan 
1029426bbc6SSowmini Varadhan 	mm_unaccount_pinned_pages(&znotif->z_mmp);
103401910dbSSowmini Varadhan 	q = &rs->rs_zcookie_queue;
10401883edaSSowmini Varadhan 	spin_lock_irqsave(&q->lock, flags);
1059426bbc6SSowmini Varadhan 	head = &q->zcookie_head;
1069426bbc6SSowmini Varadhan 	if (!list_empty(head)) {
107f753a689SPietro Borrello 		info = list_first_entry(head, struct rds_msg_zcopy_info,
1089426bbc6SSowmini Varadhan 					rs_zcookie_next);
109f753a689SPietro Borrello 		if (rds_zcookie_add(info, cookie)) {
11001883edaSSowmini Varadhan 			spin_unlock_irqrestore(&q->lock, flags);
1119426bbc6SSowmini Varadhan 			kfree(rds_info_from_znotifier(znotif));
112401910dbSSowmini Varadhan 			/* caller invokes rds_wake_sk_sleep() */
11301883edaSSowmini Varadhan 			return;
11401883edaSSowmini Varadhan 		}
1159426bbc6SSowmini Varadhan 	}
11601883edaSSowmini Varadhan 
1179426bbc6SSowmini Varadhan 	info = rds_info_from_znotifier(znotif);
1189426bbc6SSowmini Varadhan 	ck = &info->zcookies;
119401910dbSSowmini Varadhan 	memset(ck, 0, sizeof(*ck));
1209426bbc6SSowmini Varadhan 	WARN_ON(!rds_zcookie_add(info, cookie));
121*68762148SPietro Borrello 	list_add_tail(&info->rs_zcookie_next, &q->zcookie_head);
12201883edaSSowmini Varadhan 
12301883edaSSowmini Varadhan 	spin_unlock_irqrestore(&q->lock, flags);
124401910dbSSowmini Varadhan 	/* caller invokes rds_wake_sk_sleep() */
12501883edaSSowmini Varadhan }
12601883edaSSowmini Varadhan 
1277875e18eSAndy Grover /*
1287875e18eSAndy Grover  * This relies on dma_map_sg() not touching sg[].page during merging.
1297875e18eSAndy Grover  */
rds_message_purge(struct rds_message * rm)1307875e18eSAndy Grover static void rds_message_purge(struct rds_message *rm)
1317875e18eSAndy Grover {
132ea8994cbSSowmini Varadhan 	unsigned long i, flags;
13301883edaSSowmini Varadhan 	bool zcopy = false;
1347875e18eSAndy Grover 
1357875e18eSAndy Grover 	if (unlikely(test_bit(RDS_MSG_PAGEVEC, &rm->m_flags)))
1367875e18eSAndy Grover 		return;
1377875e18eSAndy Grover 
138ea8994cbSSowmini Varadhan 	spin_lock_irqsave(&rm->m_rs_lock, flags);
139ea8994cbSSowmini Varadhan 	if (rm->m_rs) {
14001883edaSSowmini Varadhan 		struct rds_sock *rs = rm->m_rs;
14101883edaSSowmini Varadhan 
14201883edaSSowmini Varadhan 		if (rm->data.op_mmp_znotifier) {
14301883edaSSowmini Varadhan 			zcopy = true;
14401883edaSSowmini Varadhan 			rds_rm_zerocopy_callback(rs, rm->data.op_mmp_znotifier);
145401910dbSSowmini Varadhan 			rds_wake_sk_sleep(rs);
14601883edaSSowmini Varadhan 			rm->data.op_mmp_znotifier = NULL;
14701883edaSSowmini Varadhan 		}
14801883edaSSowmini Varadhan 		sock_put(rds_rs_to_sk(rs));
149ea8994cbSSowmini Varadhan 		rm->m_rs = NULL;
150ea8994cbSSowmini Varadhan 	}
151ea8994cbSSowmini Varadhan 	spin_unlock_irqrestore(&rm->m_rs_lock, flags);
1527875e18eSAndy Grover 
15301883edaSSowmini Varadhan 	for (i = 0; i < rm->data.op_nents; i++) {
15401883edaSSowmini Varadhan 		/* XXX will have to put_page for page refs */
15501883edaSSowmini Varadhan 		if (!zcopy)
15601883edaSSowmini Varadhan 			__free_page(sg_page(&rm->data.op_sg[i]));
15701883edaSSowmini Varadhan 		else
15801883edaSSowmini Varadhan 			put_page(sg_page(&rm->data.op_sg[i]));
15901883edaSSowmini Varadhan 	}
16001883edaSSowmini Varadhan 	rm->data.op_nents = 0;
16101883edaSSowmini Varadhan 
162f8b3aaf2SAndy Grover 	if (rm->rdma.op_active)
163f8b3aaf2SAndy Grover 		rds_rdma_free_op(&rm->rdma);
164f8b3aaf2SAndy Grover 	if (rm->rdma.op_rdma_mr)
165e228a5d0SKa-Cheong Poon 		kref_put(&rm->rdma.op_rdma_mr->r_kref, __rds_put_mr_final);
166d0ab25a8SAndy Grover 
167d0ab25a8SAndy Grover 	if (rm->atomic.op_active)
168d0ab25a8SAndy Grover 		rds_atomic_free_op(&rm->atomic);
169d0ab25a8SAndy Grover 	if (rm->atomic.op_rdma_mr)
170e228a5d0SKa-Cheong Poon 		kref_put(&rm->atomic.op_rdma_mr->r_kref, __rds_put_mr_final);
1717875e18eSAndy Grover }
1727875e18eSAndy Grover 
rds_message_put(struct rds_message * rm)1737875e18eSAndy Grover void rds_message_put(struct rds_message *rm)
1747875e18eSAndy Grover {
1756c5a1c4aSReshetova, Elena 	rdsdebug("put rm %p ref %d\n", rm, refcount_read(&rm->m_refcount));
1766c5a1c4aSReshetova, Elena 	WARN(!refcount_read(&rm->m_refcount), "danger refcount zero on %p\n", rm);
1776c5a1c4aSReshetova, Elena 	if (refcount_dec_and_test(&rm->m_refcount)) {
1787875e18eSAndy Grover 		BUG_ON(!list_empty(&rm->m_sock_item));
1797875e18eSAndy Grover 		BUG_ON(!list_empty(&rm->m_conn_item));
1807875e18eSAndy Grover 		rds_message_purge(rm);
1817875e18eSAndy Grover 
1827875e18eSAndy Grover 		kfree(rm);
1837875e18eSAndy Grover 	}
1847875e18eSAndy Grover }
185616b757aSAndy Grover EXPORT_SYMBOL_GPL(rds_message_put);
1867875e18eSAndy Grover 
rds_message_populate_header(struct rds_header * hdr,__be16 sport,__be16 dport,u64 seq)1877875e18eSAndy Grover void rds_message_populate_header(struct rds_header *hdr, __be16 sport,
1887875e18eSAndy Grover 				 __be16 dport, u64 seq)
1897875e18eSAndy Grover {
1907875e18eSAndy Grover 	hdr->h_flags = 0;
1917875e18eSAndy Grover 	hdr->h_sport = sport;
1927875e18eSAndy Grover 	hdr->h_dport = dport;
1937875e18eSAndy Grover 	hdr->h_sequence = cpu_to_be64(seq);
1947875e18eSAndy Grover 	hdr->h_exthdr[0] = RDS_EXTHDR_NONE;
1957875e18eSAndy Grover }
196616b757aSAndy Grover EXPORT_SYMBOL_GPL(rds_message_populate_header);
1977875e18eSAndy Grover 
rds_message_add_extension(struct rds_header * hdr,unsigned int type,const void * data,unsigned int len)198ff51bf84Sstephen hemminger int rds_message_add_extension(struct rds_header *hdr, unsigned int type,
199ff51bf84Sstephen hemminger 			      const void *data, unsigned int len)
2007875e18eSAndy Grover {
2017875e18eSAndy Grover 	unsigned int ext_len = sizeof(u8) + len;
2027875e18eSAndy Grover 	unsigned char *dst;
2037875e18eSAndy Grover 
2047875e18eSAndy Grover 	/* For now, refuse to add more than one extension header */
2057875e18eSAndy Grover 	if (hdr->h_exthdr[0] != RDS_EXTHDR_NONE)
2067875e18eSAndy Grover 		return 0;
2077875e18eSAndy Grover 
208f64f9e71SJoe Perches 	if (type >= __RDS_EXTHDR_MAX || len != rds_exthdr_size[type])
2097875e18eSAndy Grover 		return 0;
2107875e18eSAndy Grover 
2117875e18eSAndy Grover 	if (ext_len >= RDS_HEADER_EXT_SPACE)
2127875e18eSAndy Grover 		return 0;
2137875e18eSAndy Grover 	dst = hdr->h_exthdr;
2147875e18eSAndy Grover 
2157875e18eSAndy Grover 	*dst++ = type;
2167875e18eSAndy Grover 	memcpy(dst, data, len);
2177875e18eSAndy Grover 
2187875e18eSAndy Grover 	dst[len] = RDS_EXTHDR_NONE;
2197875e18eSAndy Grover 	return 1;
2207875e18eSAndy Grover }
221616b757aSAndy Grover EXPORT_SYMBOL_GPL(rds_message_add_extension);
2227875e18eSAndy Grover 
2237875e18eSAndy Grover /*
2247875e18eSAndy Grover  * If a message has extension headers, retrieve them here.
2257875e18eSAndy Grover  * Call like this:
2267875e18eSAndy Grover  *
2277875e18eSAndy Grover  * unsigned int pos = 0;
2287875e18eSAndy Grover  *
2297875e18eSAndy Grover  * while (1) {
2307875e18eSAndy Grover  *	buflen = sizeof(buffer);
2317875e18eSAndy Grover  *	type = rds_message_next_extension(hdr, &pos, buffer, &buflen);
2327875e18eSAndy Grover  *	if (type == RDS_EXTHDR_NONE)
2337875e18eSAndy Grover  *		break;
2347875e18eSAndy Grover  *	...
2357875e18eSAndy Grover  * }
2367875e18eSAndy Grover  */
rds_message_next_extension(struct rds_header * hdr,unsigned int * pos,void * buf,unsigned int * buflen)2377875e18eSAndy Grover int rds_message_next_extension(struct rds_header *hdr,
2387875e18eSAndy Grover 		unsigned int *pos, void *buf, unsigned int *buflen)
2397875e18eSAndy Grover {
2407875e18eSAndy Grover 	unsigned int offset, ext_type, ext_len;
2417875e18eSAndy Grover 	u8 *src = hdr->h_exthdr;
2427875e18eSAndy Grover 
2437875e18eSAndy Grover 	offset = *pos;
2447875e18eSAndy Grover 	if (offset >= RDS_HEADER_EXT_SPACE)
2457875e18eSAndy Grover 		goto none;
2467875e18eSAndy Grover 
2477875e18eSAndy Grover 	/* Get the extension type and length. For now, the
2487875e18eSAndy Grover 	 * length is implied by the extension type. */
2497875e18eSAndy Grover 	ext_type = src[offset++];
2507875e18eSAndy Grover 
2517875e18eSAndy Grover 	if (ext_type == RDS_EXTHDR_NONE || ext_type >= __RDS_EXTHDR_MAX)
2527875e18eSAndy Grover 		goto none;
2537875e18eSAndy Grover 	ext_len = rds_exthdr_size[ext_type];
2547875e18eSAndy Grover 	if (offset + ext_len > RDS_HEADER_EXT_SPACE)
2557875e18eSAndy Grover 		goto none;
2567875e18eSAndy Grover 
2577875e18eSAndy Grover 	*pos = offset + ext_len;
2587875e18eSAndy Grover 	if (ext_len < *buflen)
2597875e18eSAndy Grover 		*buflen = ext_len;
2607875e18eSAndy Grover 	memcpy(buf, src + offset, *buflen);
2617875e18eSAndy Grover 	return ext_type;
2627875e18eSAndy Grover 
2637875e18eSAndy Grover none:
2647875e18eSAndy Grover 	*pos = RDS_HEADER_EXT_SPACE;
2657875e18eSAndy Grover 	*buflen = 0;
2667875e18eSAndy Grover 	return RDS_EXTHDR_NONE;
2677875e18eSAndy Grover }
2687875e18eSAndy Grover 
rds_message_add_rdma_dest_extension(struct rds_header * hdr,u32 r_key,u32 offset)2697875e18eSAndy Grover int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 offset)
2707875e18eSAndy Grover {
2717875e18eSAndy Grover 	struct rds_ext_header_rdma_dest ext_hdr;
2727875e18eSAndy Grover 
2737875e18eSAndy Grover 	ext_hdr.h_rdma_rkey = cpu_to_be32(r_key);
2747875e18eSAndy Grover 	ext_hdr.h_rdma_offset = cpu_to_be32(offset);
2757875e18eSAndy Grover 	return rds_message_add_extension(hdr, RDS_EXTHDR_RDMA_DEST, &ext_hdr, sizeof(ext_hdr));
2767875e18eSAndy Grover }
277616b757aSAndy Grover EXPORT_SYMBOL_GPL(rds_message_add_rdma_dest_extension);
2787875e18eSAndy Grover 
279fc445084SAndy Grover /*
280fc445084SAndy Grover  * Each rds_message is allocated with extra space for the scatterlist entries
281fc445084SAndy Grover  * rds ops will need. This is to minimize memory allocation count. Then, each rds op
282fc445084SAndy Grover  * can grab SGs when initializing its part of the rds_message.
283fc445084SAndy Grover  */
rds_message_alloc(unsigned int extra_len,gfp_t gfp)284fc445084SAndy Grover struct rds_message *rds_message_alloc(unsigned int extra_len, gfp_t gfp)
2857875e18eSAndy Grover {
2867875e18eSAndy Grover 	struct rds_message *rm;
2877875e18eSAndy Grover 
288ece6b0a2SCong Wang 	if (extra_len > KMALLOC_MAX_SIZE - sizeof(struct rds_message))
289ece6b0a2SCong Wang 		return NULL;
290ece6b0a2SCong Wang 
291fc445084SAndy Grover 	rm = kzalloc(sizeof(struct rds_message) + extra_len, gfp);
2927875e18eSAndy Grover 	if (!rm)
2937875e18eSAndy Grover 		goto out;
2947875e18eSAndy Grover 
295fc445084SAndy Grover 	rm->m_used_sgs = 0;
296fc445084SAndy Grover 	rm->m_total_sgs = extra_len / sizeof(struct scatterlist);
297fc445084SAndy Grover 
2986c5a1c4aSReshetova, Elena 	refcount_set(&rm->m_refcount, 1);
2997875e18eSAndy Grover 	INIT_LIST_HEAD(&rm->m_sock_item);
3007875e18eSAndy Grover 	INIT_LIST_HEAD(&rm->m_conn_item);
3017875e18eSAndy Grover 	spin_lock_init(&rm->m_rs_lock);
302c83188dcSChris Mason 	init_waitqueue_head(&rm->m_flush_wait);
3037875e18eSAndy Grover 
3047875e18eSAndy Grover out:
3057875e18eSAndy Grover 	return rm;
3067875e18eSAndy Grover }
3077875e18eSAndy Grover 
308fc445084SAndy Grover /*
309fc445084SAndy Grover  * RDS ops use this to grab SG entries from the rm's sg pool.
310fc445084SAndy Grover  */
rds_message_alloc_sgs(struct rds_message * rm,int nents)3117dba9203SJason Gunthorpe struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents)
312fc445084SAndy Grover {
313fc445084SAndy Grover 	struct scatterlist *sg_first = (struct scatterlist *) &rm[1];
314fc445084SAndy Grover 	struct scatterlist *sg_ret;
315fc445084SAndy Grover 
316c75ab8a5Sshamir rabinovitch 	if (nents <= 0) {
317c75ab8a5Sshamir rabinovitch 		pr_warn("rds: alloc sgs failed! nents <= 0\n");
3187dba9203SJason Gunthorpe 		return ERR_PTR(-EINVAL);
319c75ab8a5Sshamir rabinovitch 	}
320c75ab8a5Sshamir rabinovitch 
321c75ab8a5Sshamir rabinovitch 	if (rm->m_used_sgs + nents > rm->m_total_sgs) {
322c75ab8a5Sshamir rabinovitch 		pr_warn("rds: alloc sgs failed! total %d used %d nents %d\n",
323c75ab8a5Sshamir rabinovitch 			rm->m_total_sgs, rm->m_used_sgs, nents);
3247dba9203SJason Gunthorpe 		return ERR_PTR(-ENOMEM);
325c75ab8a5Sshamir rabinovitch 	}
326c75ab8a5Sshamir rabinovitch 
327fc445084SAndy Grover 	sg_ret = &sg_first[rm->m_used_sgs];
328f4dd96f7SAndy Grover 	sg_init_table(sg_ret, nents);
329fc445084SAndy Grover 	rm->m_used_sgs += nents;
330fc445084SAndy Grover 
331fc445084SAndy Grover 	return sg_ret;
332fc445084SAndy Grover }
333fc445084SAndy Grover 
rds_message_map_pages(unsigned long * page_addrs,unsigned int total_len)3347875e18eSAndy Grover struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len)
3357875e18eSAndy Grover {
3367875e18eSAndy Grover 	struct rds_message *rm;
3377875e18eSAndy Grover 	unsigned int i;
338eeb2c4fbSJacob Wen 	int num_sgs = DIV_ROUND_UP(total_len, PAGE_SIZE);
339ff87e97aSAndy Grover 	int extra_bytes = num_sgs * sizeof(struct scatterlist);
3407875e18eSAndy Grover 
341f2ec76f2SAndy Grover 	rm = rds_message_alloc(extra_bytes, GFP_NOWAIT);
3428690bfa1SAndy Grover 	if (!rm)
3437875e18eSAndy Grover 		return ERR_PTR(-ENOMEM);
3447875e18eSAndy Grover 
3457875e18eSAndy Grover 	set_bit(RDS_MSG_PAGEVEC, &rm->m_flags);
3467875e18eSAndy Grover 	rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len);
347eeb2c4fbSJacob Wen 	rm->data.op_nents = DIV_ROUND_UP(total_len, PAGE_SIZE);
3487dba9203SJason Gunthorpe 	rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs);
3497dba9203SJason Gunthorpe 	if (IS_ERR(rm->data.op_sg)) {
350bdc2ab5cSLv Yunlong 		void *err = ERR_CAST(rm->data.op_sg);
351aa58163aSPavel Emelyanov 		rds_message_put(rm);
352bdc2ab5cSLv Yunlong 		return err;
353aa58163aSPavel Emelyanov 	}
3547875e18eSAndy Grover 
3556c7cc6e4SAndy Grover 	for (i = 0; i < rm->data.op_nents; ++i) {
3566c7cc6e4SAndy Grover 		sg_set_page(&rm->data.op_sg[i],
357a60511cfSLinus Walleij 				virt_to_page((void *)page_addrs[i]),
3587875e18eSAndy Grover 				PAGE_SIZE, 0);
3597875e18eSAndy Grover 	}
3607875e18eSAndy Grover 
3617875e18eSAndy Grover 	return rm;
3627875e18eSAndy Grover }
3637875e18eSAndy Grover 
rds_message_zcopy_from_user(struct rds_message * rm,struct iov_iter * from)364496c7f3cSkbuild test robot static int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter *from)
3657875e18eSAndy Grover {
3667875e18eSAndy Grover 	struct scatterlist *sg;
367fc445084SAndy Grover 	int ret = 0;
3680cebacceSSowmini Varadhan 	int length = iov_iter_count(from);
3699426bbc6SSowmini Varadhan 	struct rds_msg_zcopy_info *info;
3707875e18eSAndy Grover 
371083735f4SAl Viro 	rm->m_inc.i_hdr.h_len = cpu_to_be32(iov_iter_count(from));
3727875e18eSAndy Grover 
3737875e18eSAndy Grover 	/*
3747875e18eSAndy Grover 	 * now allocate and copy in the data payload.
3757875e18eSAndy Grover 	 */
3766c7cc6e4SAndy Grover 	sg = rm->data.op_sg;
3777875e18eSAndy Grover 
3789426bbc6SSowmini Varadhan 	info = kzalloc(sizeof(*info), GFP_KERNEL);
3799426bbc6SSowmini Varadhan 	if (!info)
3800cebacceSSowmini Varadhan 		return -ENOMEM;
3819426bbc6SSowmini Varadhan 	INIT_LIST_HEAD(&info->rs_zcookie_next);
3829426bbc6SSowmini Varadhan 	rm->data.op_mmp_znotifier = &info->znotif;
3830cebacceSSowmini Varadhan 	if (mm_account_pinned_pages(&rm->data.op_mmp_znotifier->z_mmp,
3840cebacceSSowmini Varadhan 				    length)) {
3850cebacceSSowmini Varadhan 		ret = -ENOMEM;
3860cebacceSSowmini Varadhan 		goto err;
3870cebacceSSowmini Varadhan 	}
3880cebacceSSowmini Varadhan 	while (iov_iter_count(from)) {
3890cebacceSSowmini Varadhan 		struct page *pages;
3900cebacceSSowmini Varadhan 		size_t start;
3910cebacceSSowmini Varadhan 		ssize_t copied;
3920cebacceSSowmini Varadhan 
3931ef255e2SAl Viro 		copied = iov_iter_get_pages2(from, &pages, PAGE_SIZE,
3940cebacceSSowmini Varadhan 					    1, &start);
3950cebacceSSowmini Varadhan 		if (copied < 0) {
3960cebacceSSowmini Varadhan 			struct mmpin *mmp;
3970cebacceSSowmini Varadhan 			int i;
3980cebacceSSowmini Varadhan 
3990cebacceSSowmini Varadhan 			for (i = 0; i < rm->data.op_nents; i++)
4000cebacceSSowmini Varadhan 				put_page(sg_page(&rm->data.op_sg[i]));
4010cebacceSSowmini Varadhan 			mmp = &rm->data.op_mmp_znotifier->z_mmp;
4020cebacceSSowmini Varadhan 			mm_unaccount_pinned_pages(mmp);
4030cebacceSSowmini Varadhan 			ret = -EFAULT;
4040cebacceSSowmini Varadhan 			goto err;
4050cebacceSSowmini Varadhan 		}
4060cebacceSSowmini Varadhan 		length -= copied;
4070cebacceSSowmini Varadhan 		sg_set_page(sg, pages, copied, start);
4080cebacceSSowmini Varadhan 		rm->data.op_nents++;
4090cebacceSSowmini Varadhan 		sg++;
4100cebacceSSowmini Varadhan 	}
4110cebacceSSowmini Varadhan 	WARN_ON_ONCE(length != 0);
4120cebacceSSowmini Varadhan 	return ret;
4130cebacceSSowmini Varadhan err:
4149426bbc6SSowmini Varadhan 	kfree(info);
4150cebacceSSowmini Varadhan 	rm->data.op_mmp_znotifier = NULL;
4160cebacceSSowmini Varadhan 	return ret;
417d40a126bSSowmini Varadhan }
418d40a126bSSowmini Varadhan 
rds_message_copy_from_user(struct rds_message * rm,struct iov_iter * from,bool zcopy)419d40a126bSSowmini Varadhan int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
420d40a126bSSowmini Varadhan 			       bool zcopy)
421d40a126bSSowmini Varadhan {
422d40a126bSSowmini Varadhan 	unsigned long to_copy, nbytes;
423d40a126bSSowmini Varadhan 	unsigned long sg_off;
424d40a126bSSowmini Varadhan 	struct scatterlist *sg;
425d40a126bSSowmini Varadhan 	int ret = 0;
426d40a126bSSowmini Varadhan 
427d40a126bSSowmini Varadhan 	rm->m_inc.i_hdr.h_len = cpu_to_be32(iov_iter_count(from));
428d40a126bSSowmini Varadhan 
429d40a126bSSowmini Varadhan 	/* now allocate and copy in the data payload.  */
430d40a126bSSowmini Varadhan 	sg = rm->data.op_sg;
431d40a126bSSowmini Varadhan 	sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */
432d40a126bSSowmini Varadhan 
433d40a126bSSowmini Varadhan 	if (zcopy)
434d40a126bSSowmini Varadhan 		return rds_message_zcopy_from_user(rm, from);
4350cebacceSSowmini Varadhan 
436083735f4SAl Viro 	while (iov_iter_count(from)) {
4378690bfa1SAndy Grover 		if (!sg_page(sg)) {
438083735f4SAl Viro 			ret = rds_page_remainder_alloc(sg, iov_iter_count(from),
4397875e18eSAndy Grover 						       GFP_HIGHUSER);
4407875e18eSAndy Grover 			if (ret)
441083735f4SAl Viro 				return ret;
4426c7cc6e4SAndy Grover 			rm->data.op_nents++;
4437875e18eSAndy Grover 			sg_off = 0;
4447875e18eSAndy Grover 		}
4457875e18eSAndy Grover 
446083735f4SAl Viro 		to_copy = min_t(unsigned long, iov_iter_count(from),
447083735f4SAl Viro 				sg->length - sg_off);
4487875e18eSAndy Grover 
449083735f4SAl Viro 		rds_stats_add(s_copy_from_user, to_copy);
450d0a47d32SSowmini Varadhan 		nbytes = copy_page_from_iter(sg_page(sg), sg->offset + sg_off,
451083735f4SAl Viro 					     to_copy, from);
452d0a47d32SSowmini Varadhan 		if (nbytes != to_copy)
453083735f4SAl Viro 			return -EFAULT;
4547875e18eSAndy Grover 
4557875e18eSAndy Grover 		sg_off += to_copy;
4567875e18eSAndy Grover 
4577875e18eSAndy Grover 		if (sg_off == sg->length)
4587875e18eSAndy Grover 			sg++;
4597875e18eSAndy Grover 	}
4607875e18eSAndy Grover 
461fc445084SAndy Grover 	return ret;
4627875e18eSAndy Grover }
4637875e18eSAndy Grover 
rds_message_inc_copy_to_user(struct rds_incoming * inc,struct iov_iter * to)464c310e72cSAl Viro int rds_message_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to)
4657875e18eSAndy Grover {
4667875e18eSAndy Grover 	struct rds_message *rm;
4677875e18eSAndy Grover 	struct scatterlist *sg;
4687875e18eSAndy Grover 	unsigned long to_copy;
4697875e18eSAndy Grover 	unsigned long vec_off;
4707875e18eSAndy Grover 	int copied;
4717875e18eSAndy Grover 	int ret;
4727875e18eSAndy Grover 	u32 len;
4737875e18eSAndy Grover 
4747875e18eSAndy Grover 	rm = container_of(inc, struct rds_message, m_inc);
4757875e18eSAndy Grover 	len = be32_to_cpu(rm->m_inc.i_hdr.h_len);
4767875e18eSAndy Grover 
4776c7cc6e4SAndy Grover 	sg = rm->data.op_sg;
4787875e18eSAndy Grover 	vec_off = 0;
4797875e18eSAndy Grover 	copied = 0;
4807875e18eSAndy Grover 
481c310e72cSAl Viro 	while (iov_iter_count(to) && copied < len) {
4826ff4a8adSGeert Uytterhoeven 		to_copy = min_t(unsigned long, iov_iter_count(to),
4836ff4a8adSGeert Uytterhoeven 				sg->length - vec_off);
4847875e18eSAndy Grover 		to_copy = min_t(unsigned long, to_copy, len - copied);
4857875e18eSAndy Grover 
486c310e72cSAl Viro 		rds_stats_add(s_copy_to_user, to_copy);
487c310e72cSAl Viro 		ret = copy_page_to_iter(sg_page(sg), sg->offset + vec_off,
488c310e72cSAl Viro 					to_copy, to);
489c310e72cSAl Viro 		if (ret != to_copy)
490c310e72cSAl Viro 			return -EFAULT;
4917875e18eSAndy Grover 
4927875e18eSAndy Grover 		vec_off += to_copy;
4937875e18eSAndy Grover 		copied += to_copy;
4947875e18eSAndy Grover 
4957875e18eSAndy Grover 		if (vec_off == sg->length) {
4967875e18eSAndy Grover 			vec_off = 0;
4977875e18eSAndy Grover 			sg++;
4987875e18eSAndy Grover 		}
4997875e18eSAndy Grover 	}
5007875e18eSAndy Grover 
5017875e18eSAndy Grover 	return copied;
5027875e18eSAndy Grover }
5037875e18eSAndy Grover 
5047875e18eSAndy Grover /*
5057875e18eSAndy Grover  * If the message is still on the send queue, wait until the transport
5067875e18eSAndy Grover  * is done with it. This is particularly important for RDMA operations.
5077875e18eSAndy Grover  */
rds_message_wait(struct rds_message * rm)5087875e18eSAndy Grover void rds_message_wait(struct rds_message *rm)
5097875e18eSAndy Grover {
510c83188dcSChris Mason 	wait_event_interruptible(rm->m_flush_wait,
5117875e18eSAndy Grover 			!test_bit(RDS_MSG_MAPPED, &rm->m_flags));
5127875e18eSAndy Grover }
5137875e18eSAndy Grover 
rds_message_unmapped(struct rds_message * rm)5147875e18eSAndy Grover void rds_message_unmapped(struct rds_message *rm)
5157875e18eSAndy Grover {
5167875e18eSAndy Grover 	clear_bit(RDS_MSG_MAPPED, &rm->m_flags);
517c83188dcSChris Mason 	wake_up_interruptible(&rm->m_flush_wait);
5187875e18eSAndy Grover }
519616b757aSAndy Grover EXPORT_SYMBOL_GPL(rds_message_unmapped);
520