18ada2c1cSShachar Raindel /* 28ada2c1cSShachar Raindel * Copyright (c) 2014 Mellanox Technologies. All rights reserved. 38ada2c1cSShachar Raindel * 48ada2c1cSShachar Raindel * This software is available to you under a choice of one of two 58ada2c1cSShachar Raindel * licenses. You may choose to be licensed under the terms of the GNU 68ada2c1cSShachar Raindel * General Public License (GPL) Version 2, available from the file 78ada2c1cSShachar Raindel * COPYING in the main directory of this source tree, or the 88ada2c1cSShachar Raindel * OpenIB.org BSD license below: 98ada2c1cSShachar Raindel * 108ada2c1cSShachar Raindel * Redistribution and use in source and binary forms, with or 118ada2c1cSShachar Raindel * without modification, are permitted provided that the following 128ada2c1cSShachar Raindel * conditions are met: 138ada2c1cSShachar Raindel * 148ada2c1cSShachar Raindel * - Redistributions of source code must retain the above 158ada2c1cSShachar Raindel * copyright notice, this list of conditions and the following 168ada2c1cSShachar Raindel * disclaimer. 178ada2c1cSShachar Raindel * 188ada2c1cSShachar Raindel * - Redistributions in binary form must reproduce the above 198ada2c1cSShachar Raindel * copyright notice, this list of conditions and the following 208ada2c1cSShachar Raindel * disclaimer in the documentation and/or other materials 218ada2c1cSShachar Raindel * provided with the distribution. 228ada2c1cSShachar Raindel * 238ada2c1cSShachar Raindel * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 248ada2c1cSShachar Raindel * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 258ada2c1cSShachar Raindel * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 268ada2c1cSShachar Raindel * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 278ada2c1cSShachar Raindel * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 288ada2c1cSShachar Raindel * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 298ada2c1cSShachar Raindel * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 308ada2c1cSShachar Raindel * SOFTWARE. 318ada2c1cSShachar Raindel */ 328ada2c1cSShachar Raindel 338ada2c1cSShachar Raindel #ifndef IB_UMEM_ODP_H 348ada2c1cSShachar Raindel #define IB_UMEM_ODP_H 358ada2c1cSShachar Raindel 368ada2c1cSShachar Raindel #include <rdma/ib_umem.h> 37*882214e2SHaggai Eran #include <rdma/ib_verbs.h> 38*882214e2SHaggai Eran #include <linux/interval_tree.h> 39*882214e2SHaggai Eran 40*882214e2SHaggai Eran struct umem_odp_node { 41*882214e2SHaggai Eran u64 __subtree_last; 42*882214e2SHaggai Eran struct rb_node rb; 43*882214e2SHaggai Eran }; 448ada2c1cSShachar Raindel 458ada2c1cSShachar Raindel struct ib_umem_odp { 468ada2c1cSShachar Raindel /* 478ada2c1cSShachar Raindel * An array of the pages included in the on-demand paging umem. 488ada2c1cSShachar Raindel * Indices of pages that are currently not mapped into the device will 498ada2c1cSShachar Raindel * contain NULL. 508ada2c1cSShachar Raindel */ 518ada2c1cSShachar Raindel struct page **page_list; 528ada2c1cSShachar Raindel /* 538ada2c1cSShachar Raindel * An array of the same size as page_list, with DMA addresses mapped 548ada2c1cSShachar Raindel * for pages the pages in page_list. The lower two bits designate 558ada2c1cSShachar Raindel * access permissions. See ODP_READ_ALLOWED_BIT and 568ada2c1cSShachar Raindel * ODP_WRITE_ALLOWED_BIT. 578ada2c1cSShachar Raindel */ 588ada2c1cSShachar Raindel dma_addr_t *dma_list; 598ada2c1cSShachar Raindel /* 608ada2c1cSShachar Raindel * The umem_mutex protects the page_list and dma_list fields of an ODP 61*882214e2SHaggai Eran * umem, allowing only a single thread to map/unmap pages. The mutex 62*882214e2SHaggai Eran * also protects access to the mmu notifier counters. 638ada2c1cSShachar Raindel */ 648ada2c1cSShachar Raindel struct mutex umem_mutex; 658ada2c1cSShachar Raindel void *private; /* for the HW driver to use. */ 66*882214e2SHaggai Eran 67*882214e2SHaggai Eran /* When false, use the notifier counter in the ucontext struct. */ 68*882214e2SHaggai Eran bool mn_counters_active; 69*882214e2SHaggai Eran int notifiers_seq; 70*882214e2SHaggai Eran int notifiers_count; 71*882214e2SHaggai Eran 72*882214e2SHaggai Eran /* A linked list of umems that don't have private mmu notifier 73*882214e2SHaggai Eran * counters yet. */ 74*882214e2SHaggai Eran struct list_head no_private_counters; 75*882214e2SHaggai Eran struct ib_umem *umem; 76*882214e2SHaggai Eran 77*882214e2SHaggai Eran /* Tree tracking */ 78*882214e2SHaggai Eran struct umem_odp_node interval_tree; 79*882214e2SHaggai Eran 80*882214e2SHaggai Eran struct completion notifier_completion; 81*882214e2SHaggai Eran int dying; 828ada2c1cSShachar Raindel }; 838ada2c1cSShachar Raindel 848ada2c1cSShachar Raindel #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 858ada2c1cSShachar Raindel 868ada2c1cSShachar Raindel int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem); 878ada2c1cSShachar Raindel 888ada2c1cSShachar Raindel void ib_umem_odp_release(struct ib_umem *umem); 898ada2c1cSShachar Raindel 908ada2c1cSShachar Raindel /* 918ada2c1cSShachar Raindel * The lower 2 bits of the DMA address signal the R/W permissions for 928ada2c1cSShachar Raindel * the entry. To upgrade the permissions, provide the appropriate 938ada2c1cSShachar Raindel * bitmask to the map_dma_pages function. 948ada2c1cSShachar Raindel * 958ada2c1cSShachar Raindel * Be aware that upgrading a mapped address might result in change of 968ada2c1cSShachar Raindel * the DMA address for the page. 978ada2c1cSShachar Raindel */ 988ada2c1cSShachar Raindel #define ODP_READ_ALLOWED_BIT (1<<0ULL) 998ada2c1cSShachar Raindel #define ODP_WRITE_ALLOWED_BIT (1<<1ULL) 1008ada2c1cSShachar Raindel 1018ada2c1cSShachar Raindel #define ODP_DMA_ADDR_MASK (~(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT)) 1028ada2c1cSShachar Raindel 1038ada2c1cSShachar Raindel int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 start_offset, u64 bcnt, 1048ada2c1cSShachar Raindel u64 access_mask, unsigned long current_seq); 1058ada2c1cSShachar Raindel 1068ada2c1cSShachar Raindel void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 start_offset, 1078ada2c1cSShachar Raindel u64 bound); 1088ada2c1cSShachar Raindel 109*882214e2SHaggai Eran void rbt_ib_umem_insert(struct umem_odp_node *node, struct rb_root *root); 110*882214e2SHaggai Eran void rbt_ib_umem_remove(struct umem_odp_node *node, struct rb_root *root); 111*882214e2SHaggai Eran typedef int (*umem_call_back)(struct ib_umem *item, u64 start, u64 end, 112*882214e2SHaggai Eran void *cookie); 113*882214e2SHaggai Eran /* 114*882214e2SHaggai Eran * Call the callback on each ib_umem in the range. Returns the logical or of 115*882214e2SHaggai Eran * the return values of the functions called. 116*882214e2SHaggai Eran */ 117*882214e2SHaggai Eran int rbt_ib_umem_for_each_in_range(struct rb_root *root, u64 start, u64 end, 118*882214e2SHaggai Eran umem_call_back cb, void *cookie); 119*882214e2SHaggai Eran 120*882214e2SHaggai Eran struct umem_odp_node *rbt_ib_umem_iter_first(struct rb_root *root, 121*882214e2SHaggai Eran u64 start, u64 last); 122*882214e2SHaggai Eran struct umem_odp_node *rbt_ib_umem_iter_next(struct umem_odp_node *node, 123*882214e2SHaggai Eran u64 start, u64 last); 124*882214e2SHaggai Eran 125*882214e2SHaggai Eran static inline int ib_umem_mmu_notifier_retry(struct ib_umem *item, 126*882214e2SHaggai Eran unsigned long mmu_seq) 127*882214e2SHaggai Eran { 128*882214e2SHaggai Eran /* 129*882214e2SHaggai Eran * This code is strongly based on the KVM code from 130*882214e2SHaggai Eran * mmu_notifier_retry. Should be called with 131*882214e2SHaggai Eran * the relevant locks taken (item->odp_data->umem_mutex 132*882214e2SHaggai Eran * and the ucontext umem_mutex semaphore locked for read). 133*882214e2SHaggai Eran */ 134*882214e2SHaggai Eran 135*882214e2SHaggai Eran /* Do not allow page faults while the new ib_umem hasn't seen a state 136*882214e2SHaggai Eran * with zero notifiers yet, and doesn't have its own valid set of 137*882214e2SHaggai Eran * private counters. */ 138*882214e2SHaggai Eran if (!item->odp_data->mn_counters_active) 139*882214e2SHaggai Eran return 1; 140*882214e2SHaggai Eran 141*882214e2SHaggai Eran if (unlikely(item->odp_data->notifiers_count)) 142*882214e2SHaggai Eran return 1; 143*882214e2SHaggai Eran if (item->odp_data->notifiers_seq != mmu_seq) 144*882214e2SHaggai Eran return 1; 145*882214e2SHaggai Eran return 0; 146*882214e2SHaggai Eran } 147*882214e2SHaggai Eran 1488ada2c1cSShachar Raindel #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ 1498ada2c1cSShachar Raindel 1508ada2c1cSShachar Raindel static inline int ib_umem_odp_get(struct ib_ucontext *context, 1518ada2c1cSShachar Raindel struct ib_umem *umem) 1528ada2c1cSShachar Raindel { 1538ada2c1cSShachar Raindel return -EINVAL; 1548ada2c1cSShachar Raindel } 1558ada2c1cSShachar Raindel 1568ada2c1cSShachar Raindel static inline void ib_umem_odp_release(struct ib_umem *umem) {} 1578ada2c1cSShachar Raindel 1588ada2c1cSShachar Raindel #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ 1598ada2c1cSShachar Raindel 1608ada2c1cSShachar Raindel #endif /* IB_UMEM_ODP_H */ 161