xref: /openbmc/linux/drivers/infiniband/sw/siw/siw_mem.c (revision 2612e3bbc0386368a850140a6c9b990cd496a5ec)
12251334dSBernard Metzler // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
22251334dSBernard Metzler 
32251334dSBernard Metzler /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
42251334dSBernard Metzler /* Copyright (c) 2008-2019, IBM Corporation */
52251334dSBernard Metzler 
62251334dSBernard Metzler #include <linux/gfp.h>
72251334dSBernard Metzler #include <rdma/ib_verbs.h>
82251334dSBernard Metzler #include <linux/dma-mapping.h>
92251334dSBernard Metzler #include <linux/slab.h>
102251334dSBernard Metzler #include <linux/sched/mm.h>
112251334dSBernard Metzler #include <linux/resource.h>
122251334dSBernard Metzler 
132251334dSBernard Metzler #include "siw.h"
142251334dSBernard Metzler #include "siw_mem.h"
152251334dSBernard Metzler 
162251334dSBernard Metzler /*
172251334dSBernard Metzler  * Stag lookup is based on its index part only (24 bits).
182251334dSBernard Metzler  * The code avoids special Stag of zero and tries to randomize
192251334dSBernard Metzler  * STag values between 1 and SIW_STAG_MAX_INDEX.
202251334dSBernard Metzler  */
siw_mem_add(struct siw_device * sdev,struct siw_mem * m)212251334dSBernard Metzler int siw_mem_add(struct siw_device *sdev, struct siw_mem *m)
222251334dSBernard Metzler {
232251334dSBernard Metzler 	struct xa_limit limit = XA_LIMIT(1, 0x00ffffff);
242251334dSBernard Metzler 	u32 id, next;
252251334dSBernard Metzler 
262251334dSBernard Metzler 	get_random_bytes(&next, 4);
272251334dSBernard Metzler 	next &= 0x00ffffff;
282251334dSBernard Metzler 
292251334dSBernard Metzler 	if (xa_alloc_cyclic(&sdev->mem_xa, &id, m, limit, &next,
302251334dSBernard Metzler 	    GFP_KERNEL) < 0)
312251334dSBernard Metzler 		return -ENOMEM;
322251334dSBernard Metzler 
332251334dSBernard Metzler 	/* Set the STag index part */
342251334dSBernard Metzler 	m->stag = id << 8;
352251334dSBernard Metzler 
362251334dSBernard Metzler 	siw_dbg_mem(m, "new MEM object\n");
372251334dSBernard Metzler 
382251334dSBernard Metzler 	return 0;
392251334dSBernard Metzler }
402251334dSBernard Metzler 
412251334dSBernard Metzler /*
422251334dSBernard Metzler  * siw_mem_id2obj()
432251334dSBernard Metzler  *
442251334dSBernard Metzler  * resolves memory from stag given by id. might be called from:
452251334dSBernard Metzler  * o process context before sending out of sgl, or
462251334dSBernard Metzler  * o in softirq when resolving target memory
472251334dSBernard Metzler  */
siw_mem_id2obj(struct siw_device * sdev,int stag_index)482251334dSBernard Metzler struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index)
492251334dSBernard Metzler {
502251334dSBernard Metzler 	struct siw_mem *mem;
512251334dSBernard Metzler 
522251334dSBernard Metzler 	rcu_read_lock();
532251334dSBernard Metzler 	mem = xa_load(&sdev->mem_xa, stag_index);
542251334dSBernard Metzler 	if (likely(mem && kref_get_unless_zero(&mem->ref))) {
552251334dSBernard Metzler 		rcu_read_unlock();
562251334dSBernard Metzler 		return mem;
572251334dSBernard Metzler 	}
582251334dSBernard Metzler 	rcu_read_unlock();
592251334dSBernard Metzler 
602251334dSBernard Metzler 	return NULL;
612251334dSBernard Metzler }
622251334dSBernard Metzler 
siw_free_plist(struct siw_page_chunk * chunk,int num_pages,bool dirty)632251334dSBernard Metzler static void siw_free_plist(struct siw_page_chunk *chunk, int num_pages,
642251334dSBernard Metzler 			   bool dirty)
652251334dSBernard Metzler {
66f1f6a7ddSJohn Hubbard 	unpin_user_pages_dirty_lock(chunk->plist, num_pages, dirty);
672251334dSBernard Metzler }
682251334dSBernard Metzler 
siw_umem_release(struct siw_umem * umem,bool dirty)692251334dSBernard Metzler void siw_umem_release(struct siw_umem *umem, bool dirty)
702251334dSBernard Metzler {
712251334dSBernard Metzler 	struct mm_struct *mm_s = umem->owning_mm;
722251334dSBernard Metzler 	int i, num_pages = umem->num_pages;
732251334dSBernard Metzler 
742251334dSBernard Metzler 	for (i = 0; num_pages; i++) {
752251334dSBernard Metzler 		int to_free = min_t(int, PAGES_PER_CHUNK, num_pages);
762251334dSBernard Metzler 
772251334dSBernard Metzler 		siw_free_plist(&umem->page_chunk[i], to_free,
782251334dSBernard Metzler 			       umem->writable && dirty);
792251334dSBernard Metzler 		kfree(umem->page_chunk[i].plist);
802251334dSBernard Metzler 		num_pages -= to_free;
812251334dSBernard Metzler 	}
822251334dSBernard Metzler 	atomic64_sub(umem->num_pages, &mm_s->pinned_vm);
832251334dSBernard Metzler 
842251334dSBernard Metzler 	mmdrop(mm_s);
852251334dSBernard Metzler 	kfree(umem->page_chunk);
862251334dSBernard Metzler 	kfree(umem);
872251334dSBernard Metzler }
882251334dSBernard Metzler 
siw_mr_add_mem(struct siw_mr * mr,struct ib_pd * pd,void * mem_obj,u64 start,u64 len,int rights)892251334dSBernard Metzler int siw_mr_add_mem(struct siw_mr *mr, struct ib_pd *pd, void *mem_obj,
902251334dSBernard Metzler 		   u64 start, u64 len, int rights)
912251334dSBernard Metzler {
922251334dSBernard Metzler 	struct siw_device *sdev = to_siw_dev(pd->device);
932251334dSBernard Metzler 	struct siw_mem *mem = kzalloc(sizeof(*mem), GFP_KERNEL);
942251334dSBernard Metzler 	struct xa_limit limit = XA_LIMIT(1, 0x00ffffff);
952251334dSBernard Metzler 	u32 id, next;
962251334dSBernard Metzler 
972251334dSBernard Metzler 	if (!mem)
982251334dSBernard Metzler 		return -ENOMEM;
992251334dSBernard Metzler 
1002251334dSBernard Metzler 	mem->mem_obj = mem_obj;
1012251334dSBernard Metzler 	mem->stag_valid = 0;
1022251334dSBernard Metzler 	mem->sdev = sdev;
1032251334dSBernard Metzler 	mem->va = start;
1042251334dSBernard Metzler 	mem->len = len;
1052251334dSBernard Metzler 	mem->pd = pd;
1062251334dSBernard Metzler 	mem->perms = rights & IWARP_ACCESS_MASK;
1072251334dSBernard Metzler 	kref_init(&mem->ref);
1082251334dSBernard Metzler 
1092251334dSBernard Metzler 	get_random_bytes(&next, 4);
1102251334dSBernard Metzler 	next &= 0x00ffffff;
1112251334dSBernard Metzler 
1122251334dSBernard Metzler 	if (xa_alloc_cyclic(&sdev->mem_xa, &id, mem, limit, &next,
1132251334dSBernard Metzler 	    GFP_KERNEL) < 0) {
1142251334dSBernard Metzler 		kfree(mem);
1152251334dSBernard Metzler 		return -ENOMEM;
1162251334dSBernard Metzler 	}
1173093ee18SLv Yunlong 
1183093ee18SLv Yunlong 	mr->mem = mem;
1192251334dSBernard Metzler 	/* Set the STag index part */
1202251334dSBernard Metzler 	mem->stag = id << 8;
1212251334dSBernard Metzler 	mr->base_mr.lkey = mr->base_mr.rkey = mem->stag;
1222251334dSBernard Metzler 
1232251334dSBernard Metzler 	return 0;
1242251334dSBernard Metzler }
1252251334dSBernard Metzler 
siw_mr_drop_mem(struct siw_mr * mr)1262251334dSBernard Metzler void siw_mr_drop_mem(struct siw_mr *mr)
1272251334dSBernard Metzler {
1282251334dSBernard Metzler 	struct siw_mem *mem = mr->mem, *found;
1292251334dSBernard Metzler 
1302251334dSBernard Metzler 	mem->stag_valid = 0;
1312251334dSBernard Metzler 
1322251334dSBernard Metzler 	/* make STag invalid visible asap */
1332251334dSBernard Metzler 	smp_mb();
1342251334dSBernard Metzler 
1352251334dSBernard Metzler 	found = xa_erase(&mem->sdev->mem_xa, mem->stag >> 8);
1362251334dSBernard Metzler 	WARN_ON(found != mem);
1372251334dSBernard Metzler 	siw_mem_put(mem);
1382251334dSBernard Metzler }
1392251334dSBernard Metzler 
siw_free_mem(struct kref * ref)1402251334dSBernard Metzler void siw_free_mem(struct kref *ref)
1412251334dSBernard Metzler {
1422251334dSBernard Metzler 	struct siw_mem *mem = container_of(ref, struct siw_mem, ref);
1432251334dSBernard Metzler 
1442251334dSBernard Metzler 	siw_dbg_mem(mem, "free mem, pbl: %s\n", mem->is_pbl ? "y" : "n");
1452251334dSBernard Metzler 
1462251334dSBernard Metzler 	if (!mem->is_mw && mem->mem_obj) {
1472251334dSBernard Metzler 		if (mem->is_pbl == 0)
1482251334dSBernard Metzler 			siw_umem_release(mem->umem, true);
1492251334dSBernard Metzler 		else
1502251334dSBernard Metzler 			kfree(mem->pbl);
1512251334dSBernard Metzler 	}
1522251334dSBernard Metzler 	kfree(mem);
1532251334dSBernard Metzler }
1542251334dSBernard Metzler 
1552251334dSBernard Metzler /*
1562251334dSBernard Metzler  * siw_check_mem()
1572251334dSBernard Metzler  *
1582251334dSBernard Metzler  * Check protection domain, STAG state, access permissions and
1592251334dSBernard Metzler  * address range for memory object.
1602251334dSBernard Metzler  *
1612251334dSBernard Metzler  * @pd:		Protection Domain memory should belong to
1622251334dSBernard Metzler  * @mem:	memory to be checked
1632251334dSBernard Metzler  * @addr:	starting addr of mem
1642251334dSBernard Metzler  * @perms:	requested access permissions
1652251334dSBernard Metzler  * @len:	len of memory interval to be checked
1662251334dSBernard Metzler  *
1672251334dSBernard Metzler  */
siw_check_mem(struct ib_pd * pd,struct siw_mem * mem,u64 addr,enum ib_access_flags perms,int len)1682251334dSBernard Metzler int siw_check_mem(struct ib_pd *pd, struct siw_mem *mem, u64 addr,
1692251334dSBernard Metzler 		  enum ib_access_flags perms, int len)
1702251334dSBernard Metzler {
1712251334dSBernard Metzler 	if (!mem->stag_valid) {
1722251334dSBernard Metzler 		siw_dbg_pd(pd, "STag 0x%08x invalid\n", mem->stag);
1732251334dSBernard Metzler 		return -E_STAG_INVALID;
1742251334dSBernard Metzler 	}
1752251334dSBernard Metzler 	if (mem->pd != pd) {
1762251334dSBernard Metzler 		siw_dbg_pd(pd, "STag 0x%08x: PD mismatch\n", mem->stag);
1772251334dSBernard Metzler 		return -E_PD_MISMATCH;
1782251334dSBernard Metzler 	}
1792251334dSBernard Metzler 	/*
1802251334dSBernard Metzler 	 * check access permissions
1812251334dSBernard Metzler 	 */
1822251334dSBernard Metzler 	if ((mem->perms & perms) < perms) {
1832251334dSBernard Metzler 		siw_dbg_pd(pd, "permissions 0x%08x < 0x%08x\n",
1842251334dSBernard Metzler 			   mem->perms, perms);
1852251334dSBernard Metzler 		return -E_ACCESS_PERM;
1862251334dSBernard Metzler 	}
1872251334dSBernard Metzler 	/*
1882251334dSBernard Metzler 	 * Check if access falls into valid memory interval.
1892251334dSBernard Metzler 	 */
1902251334dSBernard Metzler 	if (addr < mem->va || addr + len > mem->va + mem->len) {
1912251334dSBernard Metzler 		siw_dbg_pd(pd, "MEM interval len %d\n", len);
192c536277eSBernard Metzler 		siw_dbg_pd(pd, "[0x%pK, 0x%pK] out of bounds\n",
193c536277eSBernard Metzler 			   (void *)(uintptr_t)addr,
194c536277eSBernard Metzler 			   (void *)(uintptr_t)(addr + len));
195c536277eSBernard Metzler 		siw_dbg_pd(pd, "[0x%pK, 0x%pK] STag=0x%08x\n",
196c536277eSBernard Metzler 			   (void *)(uintptr_t)mem->va,
197c536277eSBernard Metzler 			   (void *)(uintptr_t)(mem->va + mem->len),
1982251334dSBernard Metzler 			   mem->stag);
1992251334dSBernard Metzler 
2002251334dSBernard Metzler 		return -E_BASE_BOUNDS;
2012251334dSBernard Metzler 	}
2022251334dSBernard Metzler 	return E_ACCESS_OK;
2032251334dSBernard Metzler }
2042251334dSBernard Metzler 
2052251334dSBernard Metzler /*
2062251334dSBernard Metzler  * siw_check_sge()
2072251334dSBernard Metzler  *
2082251334dSBernard Metzler  * Check SGE for access rights in given interval
2092251334dSBernard Metzler  *
2102251334dSBernard Metzler  * @pd:		Protection Domain memory should belong to
2112251334dSBernard Metzler  * @sge:	SGE to be checked
2122251334dSBernard Metzler  * @mem:	location of memory reference within array
2132251334dSBernard Metzler  * @perms:	requested access permissions
2142251334dSBernard Metzler  * @off:	starting offset in SGE
2152251334dSBernard Metzler  * @len:	len of memory interval to be checked
2162251334dSBernard Metzler  *
2172251334dSBernard Metzler  * NOTE: Function references SGE's memory object (mem->obj)
2182251334dSBernard Metzler  * if not yet done. New reference is kept if check went ok and
2192251334dSBernard Metzler  * released if check failed. If mem->obj is already valid, no new
2202251334dSBernard Metzler  * lookup is being done and mem is not released it check fails.
2212251334dSBernard Metzler  */
siw_check_sge(struct ib_pd * pd,struct siw_sge * sge,struct siw_mem * mem[],enum ib_access_flags perms,u32 off,int len)2222251334dSBernard Metzler int siw_check_sge(struct ib_pd *pd, struct siw_sge *sge, struct siw_mem *mem[],
2232251334dSBernard Metzler 		  enum ib_access_flags perms, u32 off, int len)
2242251334dSBernard Metzler {
2252251334dSBernard Metzler 	struct siw_device *sdev = to_siw_dev(pd->device);
2262251334dSBernard Metzler 	struct siw_mem *new = NULL;
2272251334dSBernard Metzler 	int rv = E_ACCESS_OK;
2282251334dSBernard Metzler 
2292251334dSBernard Metzler 	if (len + off > sge->length) {
2302251334dSBernard Metzler 		rv = -E_BASE_BOUNDS;
2312251334dSBernard Metzler 		goto fail;
2322251334dSBernard Metzler 	}
2332251334dSBernard Metzler 	if (*mem == NULL) {
2342251334dSBernard Metzler 		new = siw_mem_id2obj(sdev, sge->lkey >> 8);
2352251334dSBernard Metzler 		if (unlikely(!new)) {
2362251334dSBernard Metzler 			siw_dbg_pd(pd, "STag unknown: 0x%08x\n", sge->lkey);
2372251334dSBernard Metzler 			rv = -E_STAG_INVALID;
2382251334dSBernard Metzler 			goto fail;
2392251334dSBernard Metzler 		}
2402251334dSBernard Metzler 		*mem = new;
2412251334dSBernard Metzler 	}
2422251334dSBernard Metzler 	/* Check if user re-registered with different STag key */
2432251334dSBernard Metzler 	if (unlikely((*mem)->stag != sge->lkey)) {
2442251334dSBernard Metzler 		siw_dbg_mem((*mem), "STag mismatch: 0x%08x\n", sge->lkey);
2452251334dSBernard Metzler 		rv = -E_STAG_INVALID;
2462251334dSBernard Metzler 		goto fail;
2472251334dSBernard Metzler 	}
2482251334dSBernard Metzler 	rv = siw_check_mem(pd, *mem, sge->laddr + off, perms, len);
2492251334dSBernard Metzler 	if (unlikely(rv))
2502251334dSBernard Metzler 		goto fail;
2512251334dSBernard Metzler 
2522251334dSBernard Metzler 	return 0;
2532251334dSBernard Metzler 
2542251334dSBernard Metzler fail:
2552251334dSBernard Metzler 	if (new) {
2562251334dSBernard Metzler 		*mem = NULL;
2572251334dSBernard Metzler 		siw_mem_put(new);
2582251334dSBernard Metzler 	}
2592251334dSBernard Metzler 	return rv;
2602251334dSBernard Metzler }
2612251334dSBernard Metzler 
siw_wqe_put_mem(struct siw_wqe * wqe,enum siw_opcode op)2622251334dSBernard Metzler void siw_wqe_put_mem(struct siw_wqe *wqe, enum siw_opcode op)
2632251334dSBernard Metzler {
2642251334dSBernard Metzler 	switch (op) {
2652251334dSBernard Metzler 	case SIW_OP_SEND:
2662251334dSBernard Metzler 	case SIW_OP_WRITE:
2672251334dSBernard Metzler 	case SIW_OP_SEND_WITH_IMM:
2682251334dSBernard Metzler 	case SIW_OP_SEND_REMOTE_INV:
2692251334dSBernard Metzler 	case SIW_OP_READ:
2702251334dSBernard Metzler 	case SIW_OP_READ_LOCAL_INV:
2712251334dSBernard Metzler 		if (!(wqe->sqe.flags & SIW_WQE_INLINE))
2722251334dSBernard Metzler 			siw_unref_mem_sgl(wqe->mem, wqe->sqe.num_sge);
2732251334dSBernard Metzler 		break;
2742251334dSBernard Metzler 
2752251334dSBernard Metzler 	case SIW_OP_RECEIVE:
2762251334dSBernard Metzler 		siw_unref_mem_sgl(wqe->mem, wqe->rqe.num_sge);
2772251334dSBernard Metzler 		break;
2782251334dSBernard Metzler 
2792251334dSBernard Metzler 	case SIW_OP_READ_RESPONSE:
2802251334dSBernard Metzler 		siw_unref_mem_sgl(wqe->mem, 1);
2812251334dSBernard Metzler 		break;
2822251334dSBernard Metzler 
2832251334dSBernard Metzler 	default:
2842251334dSBernard Metzler 		/*
2852251334dSBernard Metzler 		 * SIW_OP_INVAL_STAG and SIW_OP_REG_MR
2862251334dSBernard Metzler 		 * do not hold memory references
2872251334dSBernard Metzler 		 */
2882251334dSBernard Metzler 		break;
2892251334dSBernard Metzler 	}
2902251334dSBernard Metzler }
2912251334dSBernard Metzler 
siw_invalidate_stag(struct ib_pd * pd,u32 stag)2922251334dSBernard Metzler int siw_invalidate_stag(struct ib_pd *pd, u32 stag)
2932251334dSBernard Metzler {
2942251334dSBernard Metzler 	struct siw_device *sdev = to_siw_dev(pd->device);
2952251334dSBernard Metzler 	struct siw_mem *mem = siw_mem_id2obj(sdev, stag >> 8);
2962251334dSBernard Metzler 	int rv = 0;
2972251334dSBernard Metzler 
2982251334dSBernard Metzler 	if (unlikely(!mem)) {
2992251334dSBernard Metzler 		siw_dbg_pd(pd, "STag 0x%08x unknown\n", stag);
3002251334dSBernard Metzler 		return -EINVAL;
3012251334dSBernard Metzler 	}
3022251334dSBernard Metzler 	if (unlikely(mem->pd != pd)) {
3032251334dSBernard Metzler 		siw_dbg_pd(pd, "PD mismatch for STag 0x%08x\n", stag);
3042251334dSBernard Metzler 		rv = -EACCES;
3052251334dSBernard Metzler 		goto out;
3062251334dSBernard Metzler 	}
3072251334dSBernard Metzler 	/*
3082251334dSBernard Metzler 	 * Per RDMA verbs definition, an STag may already be in invalid
3092251334dSBernard Metzler 	 * state if invalidation is requested. So no state check here.
3102251334dSBernard Metzler 	 */
3112251334dSBernard Metzler 	mem->stag_valid = 0;
3122251334dSBernard Metzler 
3132251334dSBernard Metzler 	siw_dbg_pd(pd, "STag 0x%08x now invalid\n", stag);
3142251334dSBernard Metzler out:
3152251334dSBernard Metzler 	siw_mem_put(mem);
3162251334dSBernard Metzler 	return rv;
3172251334dSBernard Metzler }
3182251334dSBernard Metzler 
3192251334dSBernard Metzler /*
3202251334dSBernard Metzler  * Gets physical address backed by PBL element. Address is referenced
3212251334dSBernard Metzler  * by linear byte offset into list of variably sized PB elements.
3222251334dSBernard Metzler  * Optionally, provides remaining len within current element, and
3232251334dSBernard Metzler  * current PBL index for later resume at same element.
3242251334dSBernard Metzler  */
siw_pbl_get_buffer(struct siw_pbl * pbl,u64 off,int * len,int * idx)325c536277eSBernard Metzler dma_addr_t siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx)
3262251334dSBernard Metzler {
3272251334dSBernard Metzler 	int i = idx ? *idx : 0;
3282251334dSBernard Metzler 
3292251334dSBernard Metzler 	while (i < pbl->num_buf) {
3302251334dSBernard Metzler 		struct siw_pble *pble = &pbl->pbe[i];
3312251334dSBernard Metzler 
3322251334dSBernard Metzler 		if (pble->pbl_off + pble->size > off) {
3332251334dSBernard Metzler 			u64 pble_off = off - pble->pbl_off;
3342251334dSBernard Metzler 
3352251334dSBernard Metzler 			if (len)
3362251334dSBernard Metzler 				*len = pble->size - pble_off;
3372251334dSBernard Metzler 			if (idx)
3382251334dSBernard Metzler 				*idx = i;
3392251334dSBernard Metzler 
3402251334dSBernard Metzler 			return pble->addr + pble_off;
3412251334dSBernard Metzler 		}
3422251334dSBernard Metzler 		i++;
3432251334dSBernard Metzler 	}
3442251334dSBernard Metzler 	if (len)
3452251334dSBernard Metzler 		*len = 0;
3462251334dSBernard Metzler 	return 0;
3472251334dSBernard Metzler }
3482251334dSBernard Metzler 
siw_pbl_alloc(u32 num_buf)3492251334dSBernard Metzler struct siw_pbl *siw_pbl_alloc(u32 num_buf)
3502251334dSBernard Metzler {
3512251334dSBernard Metzler 	struct siw_pbl *pbl;
3522251334dSBernard Metzler 
3532251334dSBernard Metzler 	if (num_buf == 0)
3542251334dSBernard Metzler 		return ERR_PTR(-EINVAL);
3552251334dSBernard Metzler 
356bd25c806SGustavo A. R. Silva 	pbl = kzalloc(struct_size(pbl, pbe, num_buf), GFP_KERNEL);
3572251334dSBernard Metzler 	if (!pbl)
3582251334dSBernard Metzler 		return ERR_PTR(-ENOMEM);
3592251334dSBernard Metzler 
3602251334dSBernard Metzler 	pbl->max_buf = num_buf;
3612251334dSBernard Metzler 
3622251334dSBernard Metzler 	return pbl;
3632251334dSBernard Metzler }
3642251334dSBernard Metzler 
siw_umem_get(u64 start,u64 len,bool writable)3652251334dSBernard Metzler struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
3662251334dSBernard Metzler {
3672251334dSBernard Metzler 	struct siw_umem *umem;
3682251334dSBernard Metzler 	struct mm_struct *mm_s;
3692251334dSBernard Metzler 	u64 first_page_va;
3702251334dSBernard Metzler 	unsigned long mlock_limit;
371129e636fSDavid Hildenbrand 	unsigned int foll_flags = FOLL_LONGTERM;
3722251334dSBernard Metzler 	int num_pages, num_chunks, i, rv = 0;
3732251334dSBernard Metzler 
3742251334dSBernard Metzler 	if (!can_do_mlock())
3752251334dSBernard Metzler 		return ERR_PTR(-EPERM);
3762251334dSBernard Metzler 
3772251334dSBernard Metzler 	if (!len)
3782251334dSBernard Metzler 		return ERR_PTR(-EINVAL);
3792251334dSBernard Metzler 
3802251334dSBernard Metzler 	first_page_va = start & PAGE_MASK;
3812251334dSBernard Metzler 	num_pages = PAGE_ALIGN(start + len - first_page_va) >> PAGE_SHIFT;
3822251334dSBernard Metzler 	num_chunks = (num_pages >> CHUNK_SHIFT) + 1;
3832251334dSBernard Metzler 
3842251334dSBernard Metzler 	umem = kzalloc(sizeof(*umem), GFP_KERNEL);
3852251334dSBernard Metzler 	if (!umem)
3862251334dSBernard Metzler 		return ERR_PTR(-ENOMEM);
3872251334dSBernard Metzler 
3882251334dSBernard Metzler 	mm_s = current->mm;
3892251334dSBernard Metzler 	umem->owning_mm = mm_s;
3902251334dSBernard Metzler 	umem->writable = writable;
3912251334dSBernard Metzler 
3922251334dSBernard Metzler 	mmgrab(mm_s);
3932251334dSBernard Metzler 
394129e636fSDavid Hildenbrand 	if (writable)
395129e636fSDavid Hildenbrand 		foll_flags |= FOLL_WRITE;
3962251334dSBernard Metzler 
397d8ed45c5SMichel Lespinasse 	mmap_read_lock(mm_s);
3982251334dSBernard Metzler 
3992251334dSBernard Metzler 	mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
4002251334dSBernard Metzler 
40165a8fc30SBernard Metzler 	if (atomic64_add_return(num_pages, &mm_s->pinned_vm) > mlock_limit) {
4022251334dSBernard Metzler 		rv = -ENOMEM;
4032251334dSBernard Metzler 		goto out_sem_up;
4042251334dSBernard Metzler 	}
4052251334dSBernard Metzler 	umem->fp_addr = first_page_va;
4062251334dSBernard Metzler 
4072251334dSBernard Metzler 	umem->page_chunk =
4082251334dSBernard Metzler 		kcalloc(num_chunks, sizeof(struct siw_page_chunk), GFP_KERNEL);
4092251334dSBernard Metzler 	if (!umem->page_chunk) {
4102251334dSBernard Metzler 		rv = -ENOMEM;
4112251334dSBernard Metzler 		goto out_sem_up;
4122251334dSBernard Metzler 	}
4132251334dSBernard Metzler 	for (i = 0; num_pages; i++) {
41465a8fc30SBernard Metzler 		int nents = min_t(int, num_pages, PAGES_PER_CHUNK);
41565a8fc30SBernard Metzler 		struct page **plist =
4162251334dSBernard Metzler 			kcalloc(nents, sizeof(struct page *), GFP_KERNEL);
41765a8fc30SBernard Metzler 
41865a8fc30SBernard Metzler 		if (!plist) {
4192251334dSBernard Metzler 			rv = -ENOMEM;
4202251334dSBernard Metzler 			goto out_sem_up;
4212251334dSBernard Metzler 		}
42265a8fc30SBernard Metzler 		umem->page_chunk[i].plist = plist;
4232251334dSBernard Metzler 		while (nents) {
424129e636fSDavid Hildenbrand 			rv = pin_user_pages(first_page_va, nents, foll_flags,
425*4c630f30SLorenzo Stoakes 					    plist);
4262251334dSBernard Metzler 			if (rv < 0)
4272251334dSBernard Metzler 				goto out_sem_up;
4282251334dSBernard Metzler 
4292251334dSBernard Metzler 			umem->num_pages += rv;
4302251334dSBernard Metzler 			first_page_va += rv * PAGE_SIZE;
43165a8fc30SBernard Metzler 			plist += rv;
4322251334dSBernard Metzler 			nents -= rv;
43365a8fc30SBernard Metzler 			num_pages -= rv;
4342251334dSBernard Metzler 		}
4352251334dSBernard Metzler 	}
4362251334dSBernard Metzler out_sem_up:
437d8ed45c5SMichel Lespinasse 	mmap_read_unlock(mm_s);
4382251334dSBernard Metzler 
4392251334dSBernard Metzler 	if (rv > 0)
4402251334dSBernard Metzler 		return umem;
4412251334dSBernard Metzler 
44265a8fc30SBernard Metzler 	/* Adjust accounting for pages not pinned */
44365a8fc30SBernard Metzler 	if (num_pages)
44465a8fc30SBernard Metzler 		atomic64_sub(num_pages, &mm_s->pinned_vm);
44565a8fc30SBernard Metzler 
4462251334dSBernard Metzler 	siw_umem_release(umem, false);
4472251334dSBernard Metzler 
4482251334dSBernard Metzler 	return ERR_PTR(rv);
4492251334dSBernard Metzler }
450