18ada2c1cSShachar Raindel /* 28ada2c1cSShachar Raindel * Copyright (c) 2014 Mellanox Technologies. All rights reserved. 38ada2c1cSShachar Raindel * 48ada2c1cSShachar Raindel * This software is available to you under a choice of one of two 58ada2c1cSShachar Raindel * licenses. You may choose to be licensed under the terms of the GNU 68ada2c1cSShachar Raindel * General Public License (GPL) Version 2, available from the file 78ada2c1cSShachar Raindel * COPYING in the main directory of this source tree, or the 88ada2c1cSShachar Raindel * OpenIB.org BSD license below: 98ada2c1cSShachar Raindel * 108ada2c1cSShachar Raindel * Redistribution and use in source and binary forms, with or 118ada2c1cSShachar Raindel * without modification, are permitted provided that the following 128ada2c1cSShachar Raindel * conditions are met: 138ada2c1cSShachar Raindel * 148ada2c1cSShachar Raindel * - Redistributions of source code must retain the above 158ada2c1cSShachar Raindel * copyright notice, this list of conditions and the following 168ada2c1cSShachar Raindel * disclaimer. 178ada2c1cSShachar Raindel * 188ada2c1cSShachar Raindel * - Redistributions in binary form must reproduce the above 198ada2c1cSShachar Raindel * copyright notice, this list of conditions and the following 208ada2c1cSShachar Raindel * disclaimer in the documentation and/or other materials 218ada2c1cSShachar Raindel * provided with the distribution. 228ada2c1cSShachar Raindel * 238ada2c1cSShachar Raindel * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 248ada2c1cSShachar Raindel * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 258ada2c1cSShachar Raindel * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 268ada2c1cSShachar Raindel * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 278ada2c1cSShachar Raindel * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 288ada2c1cSShachar Raindel * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 298ada2c1cSShachar Raindel * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 308ada2c1cSShachar Raindel * SOFTWARE. 318ada2c1cSShachar Raindel */ 328ada2c1cSShachar Raindel 338ada2c1cSShachar Raindel #ifndef IB_UMEM_ODP_H 348ada2c1cSShachar Raindel #define IB_UMEM_ODP_H 358ada2c1cSShachar Raindel 368ada2c1cSShachar Raindel #include <rdma/ib_umem.h> 37882214e2SHaggai Eran #include <rdma/ib_verbs.h> 38882214e2SHaggai Eran #include <linux/interval_tree.h> 39882214e2SHaggai Eran 408ada2c1cSShachar Raindel struct ib_umem_odp { 4141b4deeaSJason Gunthorpe struct ib_umem umem; 42c9990ab3SJason Gunthorpe struct ib_ucontext_per_mm *per_mm; 43c9990ab3SJason Gunthorpe 448ada2c1cSShachar Raindel /* 458ada2c1cSShachar Raindel * An array of the pages included in the on-demand paging umem. 468ada2c1cSShachar Raindel * Indices of pages that are currently not mapped into the device will 478ada2c1cSShachar Raindel * contain NULL. 488ada2c1cSShachar Raindel */ 498ada2c1cSShachar Raindel struct page **page_list; 508ada2c1cSShachar Raindel /* 518ada2c1cSShachar Raindel * An array of the same size as page_list, with DMA addresses mapped 528ada2c1cSShachar Raindel * for pages the pages in page_list. The lower two bits designate 538ada2c1cSShachar Raindel * access permissions. See ODP_READ_ALLOWED_BIT and 548ada2c1cSShachar Raindel * ODP_WRITE_ALLOWED_BIT. 558ada2c1cSShachar Raindel */ 568ada2c1cSShachar Raindel dma_addr_t *dma_list; 578ada2c1cSShachar Raindel /* 588ada2c1cSShachar Raindel * The umem_mutex protects the page_list and dma_list fields of an ODP 59882214e2SHaggai Eran * umem, allowing only a single thread to map/unmap pages. The mutex 60882214e2SHaggai Eran * also protects access to the mmu notifier counters. 618ada2c1cSShachar Raindel */ 628ada2c1cSShachar Raindel struct mutex umem_mutex; 638ada2c1cSShachar Raindel void *private; /* for the HW driver to use. */ 64882214e2SHaggai Eran 65882214e2SHaggai Eran int notifiers_seq; 66882214e2SHaggai Eran int notifiers_count; 67d10bcf94SShiraz Saleem int npages; 68882214e2SHaggai Eran 69882214e2SHaggai Eran /* Tree tracking */ 707cc2e18fSJason Gunthorpe struct interval_tree_node interval_tree; 71882214e2SHaggai Eran 72*fd7dbf03SJason Gunthorpe /* 73*fd7dbf03SJason Gunthorpe * An implicit odp umem cannot be DMA mapped, has 0 length, and serves 74*fd7dbf03SJason Gunthorpe * only as an anchor for the driver to hold onto the per_mm. FIXME: 75*fd7dbf03SJason Gunthorpe * This should be removed and drivers should work with the per_mm 76*fd7dbf03SJason Gunthorpe * directly. 77*fd7dbf03SJason Gunthorpe */ 78*fd7dbf03SJason Gunthorpe bool is_implicit_odp; 79*fd7dbf03SJason Gunthorpe 80882214e2SHaggai Eran struct completion notifier_completion; 81882214e2SHaggai Eran int dying; 82d2183c6fSJason Gunthorpe unsigned int page_shift; 83d07d1d70SArtemy Kovalyov struct work_struct work; 848ada2c1cSShachar Raindel }; 858ada2c1cSShachar Raindel 86b5231b01SJason Gunthorpe static inline struct ib_umem_odp *to_ib_umem_odp(struct ib_umem *umem) 87b5231b01SJason Gunthorpe { 8841b4deeaSJason Gunthorpe return container_of(umem, struct ib_umem_odp, umem); 89b5231b01SJason Gunthorpe } 90b5231b01SJason Gunthorpe 91d2183c6fSJason Gunthorpe /* Returns the first page of an ODP umem. */ 92d2183c6fSJason Gunthorpe static inline unsigned long ib_umem_start(struct ib_umem_odp *umem_odp) 93d2183c6fSJason Gunthorpe { 94d2183c6fSJason Gunthorpe return ALIGN_DOWN(umem_odp->umem.address, 1UL << umem_odp->page_shift); 95d2183c6fSJason Gunthorpe } 96d2183c6fSJason Gunthorpe 97d2183c6fSJason Gunthorpe /* Returns the address of the page after the last one of an ODP umem. */ 98d2183c6fSJason Gunthorpe static inline unsigned long ib_umem_end(struct ib_umem_odp *umem_odp) 99d2183c6fSJason Gunthorpe { 100d2183c6fSJason Gunthorpe return ALIGN(umem_odp->umem.address + umem_odp->umem.length, 101d2183c6fSJason Gunthorpe 1UL << umem_odp->page_shift); 102d2183c6fSJason Gunthorpe } 103d2183c6fSJason Gunthorpe 104d2183c6fSJason Gunthorpe static inline size_t ib_umem_odp_num_pages(struct ib_umem_odp *umem_odp) 105d2183c6fSJason Gunthorpe { 106d2183c6fSJason Gunthorpe return (ib_umem_end(umem_odp) - ib_umem_start(umem_odp)) >> 107d2183c6fSJason Gunthorpe umem_odp->page_shift; 108d2183c6fSJason Gunthorpe } 109d2183c6fSJason Gunthorpe 11013859d5dSLeon Romanovsky /* 11113859d5dSLeon Romanovsky * The lower 2 bits of the DMA address signal the R/W permissions for 11213859d5dSLeon Romanovsky * the entry. To upgrade the permissions, provide the appropriate 11313859d5dSLeon Romanovsky * bitmask to the map_dma_pages function. 11413859d5dSLeon Romanovsky * 11513859d5dSLeon Romanovsky * Be aware that upgrading a mapped address might result in change of 11613859d5dSLeon Romanovsky * the DMA address for the page. 11713859d5dSLeon Romanovsky */ 11813859d5dSLeon Romanovsky #define ODP_READ_ALLOWED_BIT (1<<0ULL) 11913859d5dSLeon Romanovsky #define ODP_WRITE_ALLOWED_BIT (1<<1ULL) 12013859d5dSLeon Romanovsky 12113859d5dSLeon Romanovsky #define ODP_DMA_ADDR_MASK (~(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT)) 12213859d5dSLeon Romanovsky 1238ada2c1cSShachar Raindel #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 1248ada2c1cSShachar Raindel 125f27a0d50SJason Gunthorpe struct ib_ucontext_per_mm { 126f27a0d50SJason Gunthorpe struct ib_ucontext *context; 127f27a0d50SJason Gunthorpe struct mm_struct *mm; 128f27a0d50SJason Gunthorpe struct pid *tgid; 129be7a57b4SJason Gunthorpe bool active; 130f27a0d50SJason Gunthorpe 131f27a0d50SJason Gunthorpe struct rb_root_cached umem_tree; 132f27a0d50SJason Gunthorpe /* Protects umem_tree */ 133f27a0d50SJason Gunthorpe struct rw_semaphore umem_rwsem; 134f27a0d50SJason Gunthorpe 135f27a0d50SJason Gunthorpe struct mmu_notifier mn; 136f27a0d50SJason Gunthorpe unsigned int odp_mrs_count; 137f27a0d50SJason Gunthorpe 138f27a0d50SJason Gunthorpe struct list_head ucontext_list; 13956ac9dd9SJason Gunthorpe struct rcu_head rcu; 140f27a0d50SJason Gunthorpe }; 141f27a0d50SJason Gunthorpe 14241b4deeaSJason Gunthorpe int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access); 143da6a496aSMoni Shoua struct ib_umem_odp *ib_alloc_odp_umem(struct ib_umem_odp *root_umem, 144b5231b01SJason Gunthorpe unsigned long addr, size_t size); 145b5231b01SJason Gunthorpe void ib_umem_odp_release(struct ib_umem_odp *umem_odp); 1468ada2c1cSShachar Raindel 147b5231b01SJason Gunthorpe int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset, 148b5231b01SJason Gunthorpe u64 bcnt, u64 access_mask, 149b5231b01SJason Gunthorpe unsigned long current_seq); 1508ada2c1cSShachar Raindel 151b5231b01SJason Gunthorpe void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset, 1528ada2c1cSShachar Raindel u64 bound); 1538ada2c1cSShachar Raindel 154b5231b01SJason Gunthorpe typedef int (*umem_call_back)(struct ib_umem_odp *item, u64 start, u64 end, 155882214e2SHaggai Eran void *cookie); 156882214e2SHaggai Eran /* 157882214e2SHaggai Eran * Call the callback on each ib_umem in the range. Returns the logical or of 158882214e2SHaggai Eran * the return values of the functions called. 159882214e2SHaggai Eran */ 160f808c13fSDavidlohr Bueso int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root, 161f808c13fSDavidlohr Bueso u64 start, u64 end, 16293065ac7SMichal Hocko umem_call_back cb, 16393065ac7SMichal Hocko bool blockable, void *cookie); 164882214e2SHaggai Eran 165d07d1d70SArtemy Kovalyov /* 166d07d1d70SArtemy Kovalyov * Find first region intersecting with address range. 167d07d1d70SArtemy Kovalyov * Return NULL if not found 168d07d1d70SArtemy Kovalyov */ 1697cc2e18fSJason Gunthorpe static inline struct ib_umem_odp * 1707cc2e18fSJason Gunthorpe rbt_ib_umem_lookup(struct rb_root_cached *root, u64 addr, u64 length) 1717cc2e18fSJason Gunthorpe { 1727cc2e18fSJason Gunthorpe struct interval_tree_node *node; 1737cc2e18fSJason Gunthorpe 1747cc2e18fSJason Gunthorpe node = interval_tree_iter_first(root, addr, addr + length - 1); 1757cc2e18fSJason Gunthorpe if (!node) 1767cc2e18fSJason Gunthorpe return NULL; 1777cc2e18fSJason Gunthorpe return container_of(node, struct ib_umem_odp, interval_tree); 1787cc2e18fSJason Gunthorpe 1797cc2e18fSJason Gunthorpe } 180882214e2SHaggai Eran 181b5231b01SJason Gunthorpe static inline int ib_umem_mmu_notifier_retry(struct ib_umem_odp *umem_odp, 182882214e2SHaggai Eran unsigned long mmu_seq) 183882214e2SHaggai Eran { 184882214e2SHaggai Eran /* 185882214e2SHaggai Eran * This code is strongly based on the KVM code from 186882214e2SHaggai Eran * mmu_notifier_retry. Should be called with 187b5231b01SJason Gunthorpe * the relevant locks taken (umem_odp->umem_mutex 188882214e2SHaggai Eran * and the ucontext umem_mutex semaphore locked for read). 189882214e2SHaggai Eran */ 190882214e2SHaggai Eran 191b5231b01SJason Gunthorpe if (unlikely(umem_odp->notifiers_count)) 192882214e2SHaggai Eran return 1; 193b5231b01SJason Gunthorpe if (umem_odp->notifiers_seq != mmu_seq) 194882214e2SHaggai Eran return 1; 195882214e2SHaggai Eran return 0; 196882214e2SHaggai Eran } 197882214e2SHaggai Eran 1988ada2c1cSShachar Raindel #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ 1998ada2c1cSShachar Raindel 20041b4deeaSJason Gunthorpe static inline int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access) 2018ada2c1cSShachar Raindel { 2028ada2c1cSShachar Raindel return -EINVAL; 2038ada2c1cSShachar Raindel } 2048ada2c1cSShachar Raindel 205b5231b01SJason Gunthorpe static inline void ib_umem_odp_release(struct ib_umem_odp *umem_odp) {} 2068ada2c1cSShachar Raindel 2078ada2c1cSShachar Raindel #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ 2088ada2c1cSShachar Raindel 2098ada2c1cSShachar Raindel #endif /* IB_UMEM_ODP_H */ 210