1 // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause 2 /* 3 * Copyright(c) 2020 Cornelis Networks, Inc. 4 * Copyright(c) 2016 - 2017 Intel Corporation. 5 */ 6 7 #include <linux/list.h> 8 #include <linux/rculist.h> 9 #include <linux/mmu_notifier.h> 10 #include <linux/interval_tree_generic.h> 11 #include <linux/sched/mm.h> 12 13 #include "mmu_rb.h" 14 #include "trace.h" 15 16 static unsigned long mmu_node_start(struct mmu_rb_node *); 17 static unsigned long mmu_node_last(struct mmu_rb_node *); 18 static int mmu_notifier_range_start(struct mmu_notifier *, 19 const struct mmu_notifier_range *); 20 static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *, 21 unsigned long, unsigned long); 22 static void release_immediate(struct kref *refcount); 23 static void handle_remove(struct work_struct *work); 24 25 static const struct mmu_notifier_ops mn_opts = { 26 .invalidate_range_start = mmu_notifier_range_start, 27 }; 28 29 INTERVAL_TREE_DEFINE(struct mmu_rb_node, node, unsigned long, __last, 30 mmu_node_start, mmu_node_last, static, __mmu_int_rb); 31 32 static unsigned long mmu_node_start(struct mmu_rb_node *node) 33 { 34 return node->addr & PAGE_MASK; 35 } 36 37 static unsigned long mmu_node_last(struct mmu_rb_node *node) 38 { 39 return PAGE_ALIGN(node->addr + node->len) - 1; 40 } 41 42 int hfi1_mmu_rb_register(void *ops_arg, 43 struct mmu_rb_ops *ops, 44 struct workqueue_struct *wq, 45 struct mmu_rb_handler **handler) 46 { 47 struct mmu_rb_handler *h; 48 void *free_ptr; 49 int ret; 50 51 free_ptr = kzalloc(sizeof(*h) + cache_line_size() - 1, GFP_KERNEL); 52 if (!free_ptr) 53 return -ENOMEM; 54 55 h = PTR_ALIGN(free_ptr, cache_line_size()); 56 h->root = RB_ROOT_CACHED; 57 h->ops = ops; 58 h->ops_arg = ops_arg; 59 INIT_HLIST_NODE(&h->mn.hlist); 60 spin_lock_init(&h->lock); 61 h->mn.ops = &mn_opts; 62 INIT_WORK(&h->del_work, handle_remove); 63 INIT_LIST_HEAD(&h->del_list); 64 INIT_LIST_HEAD(&h->lru_list); 65 h->wq = wq; 66 h->free_ptr = free_ptr; 67 68 ret = mmu_notifier_register(&h->mn, current->mm); 69 if (ret) { 70 kfree(free_ptr); 71 return ret; 72 } 73 74 *handler = h; 75 return 0; 76 } 77 78 void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler) 79 { 80 struct mmu_rb_node *rbnode; 81 struct rb_node *node; 82 unsigned long flags; 83 struct list_head del_list; 84 85 /* Prevent freeing of mm until we are completely finished. */ 86 mmgrab(handler->mn.mm); 87 88 /* Unregister first so we don't get any more notifications. */ 89 mmu_notifier_unregister(&handler->mn, handler->mn.mm); 90 91 /* 92 * Make sure the wq delete handler is finished running. It will not 93 * be triggered once the mmu notifiers are unregistered above. 94 */ 95 flush_work(&handler->del_work); 96 97 INIT_LIST_HEAD(&del_list); 98 99 spin_lock_irqsave(&handler->lock, flags); 100 while ((node = rb_first_cached(&handler->root))) { 101 rbnode = rb_entry(node, struct mmu_rb_node, node); 102 rb_erase_cached(node, &handler->root); 103 /* move from LRU list to delete list */ 104 list_move(&rbnode->list, &del_list); 105 } 106 spin_unlock_irqrestore(&handler->lock, flags); 107 108 while (!list_empty(&del_list)) { 109 rbnode = list_first_entry(&del_list, struct mmu_rb_node, list); 110 list_del(&rbnode->list); 111 kref_put(&rbnode->refcount, release_immediate); 112 } 113 114 /* Now the mm may be freed. */ 115 mmdrop(handler->mn.mm); 116 117 kfree(handler->free_ptr); 118 } 119 120 int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler, 121 struct mmu_rb_node *mnode) 122 { 123 struct mmu_rb_node *node; 124 unsigned long flags; 125 int ret = 0; 126 127 trace_hfi1_mmu_rb_insert(mnode); 128 129 if (current->mm != handler->mn.mm) 130 return -EPERM; 131 132 spin_lock_irqsave(&handler->lock, flags); 133 node = __mmu_rb_search(handler, mnode->addr, mnode->len); 134 if (node) { 135 ret = -EEXIST; 136 goto unlock; 137 } 138 __mmu_int_rb_insert(mnode, &handler->root); 139 list_add_tail(&mnode->list, &handler->lru_list); 140 mnode->handler = handler; 141 unlock: 142 spin_unlock_irqrestore(&handler->lock, flags); 143 return ret; 144 } 145 146 /* Caller must hold handler lock */ 147 struct mmu_rb_node *hfi1_mmu_rb_get_first(struct mmu_rb_handler *handler, 148 unsigned long addr, unsigned long len) 149 { 150 struct mmu_rb_node *node; 151 152 trace_hfi1_mmu_rb_search(addr, len); 153 node = __mmu_int_rb_iter_first(&handler->root, addr, (addr + len) - 1); 154 if (node) 155 list_move_tail(&node->list, &handler->lru_list); 156 return node; 157 } 158 159 /* Caller must hold handler lock */ 160 static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler, 161 unsigned long addr, 162 unsigned long len) 163 { 164 struct mmu_rb_node *node = NULL; 165 166 trace_hfi1_mmu_rb_search(addr, len); 167 if (!handler->ops->filter) { 168 node = __mmu_int_rb_iter_first(&handler->root, addr, 169 (addr + len) - 1); 170 } else { 171 for (node = __mmu_int_rb_iter_first(&handler->root, addr, 172 (addr + len) - 1); 173 node; 174 node = __mmu_int_rb_iter_next(node, addr, 175 (addr + len) - 1)) { 176 if (handler->ops->filter(node, addr, len)) 177 return node; 178 } 179 } 180 return node; 181 } 182 183 /* 184 * Must NOT call while holding mnode->handler->lock. 185 * mnode->handler->ops->remove() may sleep and mnode->handler->lock is a 186 * spinlock. 187 */ 188 static void release_immediate(struct kref *refcount) 189 { 190 struct mmu_rb_node *mnode = 191 container_of(refcount, struct mmu_rb_node, refcount); 192 trace_hfi1_mmu_release_node(mnode); 193 mnode->handler->ops->remove(mnode->handler->ops_arg, mnode); 194 } 195 196 /* Caller must hold mnode->handler->lock */ 197 static void release_nolock(struct kref *refcount) 198 { 199 struct mmu_rb_node *mnode = 200 container_of(refcount, struct mmu_rb_node, refcount); 201 list_move(&mnode->list, &mnode->handler->del_list); 202 queue_work(mnode->handler->wq, &mnode->handler->del_work); 203 } 204 205 /* 206 * struct mmu_rb_node->refcount kref_put() callback. 207 * Adds mmu_rb_node to mmu_rb_node->handler->del_list and queues 208 * handler->del_work on handler->wq. 209 * Does not remove mmu_rb_node from handler->lru_list or handler->rb_root. 210 * Acquires mmu_rb_node->handler->lock; do not call while already holding 211 * handler->lock. 212 */ 213 void hfi1_mmu_rb_release(struct kref *refcount) 214 { 215 struct mmu_rb_node *mnode = 216 container_of(refcount, struct mmu_rb_node, refcount); 217 struct mmu_rb_handler *handler = mnode->handler; 218 unsigned long flags; 219 220 spin_lock_irqsave(&handler->lock, flags); 221 list_move(&mnode->list, &mnode->handler->del_list); 222 spin_unlock_irqrestore(&handler->lock, flags); 223 queue_work(handler->wq, &handler->del_work); 224 } 225 226 void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg) 227 { 228 struct mmu_rb_node *rbnode, *ptr; 229 struct list_head del_list; 230 unsigned long flags; 231 bool stop = false; 232 233 if (current->mm != handler->mn.mm) 234 return; 235 236 INIT_LIST_HEAD(&del_list); 237 238 spin_lock_irqsave(&handler->lock, flags); 239 list_for_each_entry_safe(rbnode, ptr, &handler->lru_list, list) { 240 /* refcount == 1 implies mmu_rb_handler has only rbnode ref */ 241 if (kref_read(&rbnode->refcount) > 1) 242 continue; 243 244 if (handler->ops->evict(handler->ops_arg, rbnode, evict_arg, 245 &stop)) { 246 __mmu_int_rb_remove(rbnode, &handler->root); 247 /* move from LRU list to delete list */ 248 list_move(&rbnode->list, &del_list); 249 } 250 if (stop) 251 break; 252 } 253 spin_unlock_irqrestore(&handler->lock, flags); 254 255 list_for_each_entry_safe(rbnode, ptr, &del_list, list) { 256 trace_hfi1_mmu_rb_evict(rbnode); 257 kref_put(&rbnode->refcount, release_immediate); 258 } 259 } 260 261 static int mmu_notifier_range_start(struct mmu_notifier *mn, 262 const struct mmu_notifier_range *range) 263 { 264 struct mmu_rb_handler *handler = 265 container_of(mn, struct mmu_rb_handler, mn); 266 struct rb_root_cached *root = &handler->root; 267 struct mmu_rb_node *node, *ptr = NULL; 268 unsigned long flags; 269 270 spin_lock_irqsave(&handler->lock, flags); 271 for (node = __mmu_int_rb_iter_first(root, range->start, range->end-1); 272 node; node = ptr) { 273 /* Guard against node removal. */ 274 ptr = __mmu_int_rb_iter_next(node, range->start, 275 range->end - 1); 276 trace_hfi1_mmu_mem_invalidate(node); 277 /* Remove from rb tree and lru_list. */ 278 __mmu_int_rb_remove(node, root); 279 list_del_init(&node->list); 280 kref_put(&node->refcount, release_nolock); 281 } 282 spin_unlock_irqrestore(&handler->lock, flags); 283 284 return 0; 285 } 286 287 /* 288 * Work queue function to remove all nodes that have been queued up to 289 * be removed. The key feature is that mm->mmap_lock is not being held 290 * and the remove callback can sleep while taking it, if needed. 291 */ 292 static void handle_remove(struct work_struct *work) 293 { 294 struct mmu_rb_handler *handler = container_of(work, 295 struct mmu_rb_handler, 296 del_work); 297 struct list_head del_list; 298 unsigned long flags; 299 struct mmu_rb_node *node; 300 301 /* remove anything that is queued to get removed */ 302 spin_lock_irqsave(&handler->lock, flags); 303 list_replace_init(&handler->del_list, &del_list); 304 spin_unlock_irqrestore(&handler->lock, flags); 305 306 while (!list_empty(&del_list)) { 307 node = list_first_entry(&del_list, struct mmu_rb_node, list); 308 list_del(&node->list); 309 trace_hfi1_mmu_release_node(node); 310 handler->ops->remove(handler->ops_arg, node); 311 } 312 } 313