1 /* 2 * linux/mm/mmu_notifier.c 3 * 4 * Copyright (C) 2008 Qumranet, Inc. 5 * Copyright (C) 2008 SGI 6 * Christoph Lameter <clameter@sgi.com> 7 * 8 * This work is licensed under the terms of the GNU GPL, version 2. See 9 * the COPYING file in the top-level directory. 10 */ 11 12 #include <linux/rculist.h> 13 #include <linux/mmu_notifier.h> 14 #include <linux/export.h> 15 #include <linux/mm.h> 16 #include <linux/err.h> 17 #include <linux/srcu.h> 18 #include <linux/rcupdate.h> 19 #include <linux/sched.h> 20 #include <linux/slab.h> 21 22 /* global SRCU for all MMs */ 23 static struct srcu_struct srcu; 24 25 /* 26 * This function can't run concurrently against mmu_notifier_register 27 * because mm->mm_users > 0 during mmu_notifier_register and exit_mmap 28 * runs with mm_users == 0. Other tasks may still invoke mmu notifiers 29 * in parallel despite there being no task using this mm any more, 30 * through the vmas outside of the exit_mmap context, such as with 31 * vmtruncate. This serializes against mmu_notifier_unregister with 32 * the mmu_notifier_mm->lock in addition to SRCU and it serializes 33 * against the other mmu notifiers with SRCU. struct mmu_notifier_mm 34 * can't go away from under us as exit_mmap holds an mm_count pin 35 * itself. 36 */ 37 void __mmu_notifier_release(struct mm_struct *mm) 38 { 39 struct mmu_notifier *mn; 40 int id; 41 42 /* 43 * SRCU here will block mmu_notifier_unregister until 44 * ->release returns. 45 */ 46 id = srcu_read_lock(&srcu); 47 hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) 48 /* 49 * If ->release runs before mmu_notifier_unregister it must be 50 * handled, as it's the only way for the driver to flush all 51 * existing sptes and stop the driver from establishing any more 52 * sptes before all the pages in the mm are freed. 53 */ 54 if (mn->ops->release) 55 mn->ops->release(mn, mm); 56 srcu_read_unlock(&srcu, id); 57 58 spin_lock(&mm->mmu_notifier_mm->lock); 59 while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) { 60 mn = hlist_entry(mm->mmu_notifier_mm->list.first, 61 struct mmu_notifier, 62 hlist); 63 /* 64 * We arrived before mmu_notifier_unregister so 65 * mmu_notifier_unregister will do nothing other than to wait 66 * for ->release to finish and for mmu_notifier_unregister to 67 * return. 68 */ 69 hlist_del_init_rcu(&mn->hlist); 70 } 71 spin_unlock(&mm->mmu_notifier_mm->lock); 72 73 /* 74 * synchronize_srcu here prevents mmu_notifier_release from returning to 75 * exit_mmap (which would proceed with freeing all pages in the mm) 76 * until the ->release method returns, if it was invoked by 77 * mmu_notifier_unregister. 78 * 79 * The mmu_notifier_mm can't go away from under us because one mm_count 80 * is held by exit_mmap. 81 */ 82 synchronize_srcu(&srcu); 83 } 84 85 /* 86 * If no young bitflag is supported by the hardware, ->clear_flush_young can 87 * unmap the address and return 1 or 0 depending if the mapping previously 88 * existed or not. 89 */ 90 int __mmu_notifier_clear_flush_young(struct mm_struct *mm, 91 unsigned long address) 92 { 93 struct mmu_notifier *mn; 94 int young = 0, id; 95 96 id = srcu_read_lock(&srcu); 97 hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { 98 if (mn->ops->clear_flush_young) 99 young |= mn->ops->clear_flush_young(mn, mm, address); 100 } 101 srcu_read_unlock(&srcu, id); 102 103 return young; 104 } 105 106 int __mmu_notifier_test_young(struct mm_struct *mm, 107 unsigned long address) 108 { 109 struct mmu_notifier *mn; 110 int young = 0, id; 111 112 id = srcu_read_lock(&srcu); 113 hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { 114 if (mn->ops->test_young) { 115 young = mn->ops->test_young(mn, mm, address); 116 if (young) 117 break; 118 } 119 } 120 srcu_read_unlock(&srcu, id); 121 122 return young; 123 } 124 125 void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address, 126 pte_t pte) 127 { 128 struct mmu_notifier *mn; 129 int id; 130 131 id = srcu_read_lock(&srcu); 132 hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { 133 if (mn->ops->change_pte) 134 mn->ops->change_pte(mn, mm, address, pte); 135 } 136 srcu_read_unlock(&srcu, id); 137 } 138 139 void __mmu_notifier_invalidate_page(struct mm_struct *mm, 140 unsigned long address) 141 { 142 struct mmu_notifier *mn; 143 int id; 144 145 id = srcu_read_lock(&srcu); 146 hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { 147 if (mn->ops->invalidate_page) 148 mn->ops->invalidate_page(mn, mm, address); 149 } 150 srcu_read_unlock(&srcu, id); 151 } 152 153 void __mmu_notifier_invalidate_range_start(struct mm_struct *mm, 154 unsigned long start, unsigned long end) 155 { 156 struct mmu_notifier *mn; 157 int id; 158 159 id = srcu_read_lock(&srcu); 160 hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { 161 if (mn->ops->invalidate_range_start) 162 mn->ops->invalidate_range_start(mn, mm, start, end); 163 } 164 srcu_read_unlock(&srcu, id); 165 } 166 EXPORT_SYMBOL_GPL(__mmu_notifier_invalidate_range_start); 167 168 void __mmu_notifier_invalidate_range_end(struct mm_struct *mm, 169 unsigned long start, unsigned long end) 170 { 171 struct mmu_notifier *mn; 172 int id; 173 174 id = srcu_read_lock(&srcu); 175 hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { 176 if (mn->ops->invalidate_range_end) 177 mn->ops->invalidate_range_end(mn, mm, start, end); 178 } 179 srcu_read_unlock(&srcu, id); 180 } 181 EXPORT_SYMBOL_GPL(__mmu_notifier_invalidate_range_end); 182 183 static int do_mmu_notifier_register(struct mmu_notifier *mn, 184 struct mm_struct *mm, 185 int take_mmap_sem) 186 { 187 struct mmu_notifier_mm *mmu_notifier_mm; 188 int ret; 189 190 BUG_ON(atomic_read(&mm->mm_users) <= 0); 191 192 /* 193 * Verify that mmu_notifier_init() already run and the global srcu is 194 * initialized. 195 */ 196 BUG_ON(!srcu.per_cpu_ref); 197 198 ret = -ENOMEM; 199 mmu_notifier_mm = kmalloc(sizeof(struct mmu_notifier_mm), GFP_KERNEL); 200 if (unlikely(!mmu_notifier_mm)) 201 goto out; 202 203 if (take_mmap_sem) 204 down_write(&mm->mmap_sem); 205 ret = mm_take_all_locks(mm); 206 if (unlikely(ret)) 207 goto out_clean; 208 209 if (!mm_has_notifiers(mm)) { 210 INIT_HLIST_HEAD(&mmu_notifier_mm->list); 211 spin_lock_init(&mmu_notifier_mm->lock); 212 213 mm->mmu_notifier_mm = mmu_notifier_mm; 214 mmu_notifier_mm = NULL; 215 } 216 atomic_inc(&mm->mm_count); 217 218 /* 219 * Serialize the update against mmu_notifier_unregister. A 220 * side note: mmu_notifier_release can't run concurrently with 221 * us because we hold the mm_users pin (either implicitly as 222 * current->mm or explicitly with get_task_mm() or similar). 223 * We can't race against any other mmu notifier method either 224 * thanks to mm_take_all_locks(). 225 */ 226 spin_lock(&mm->mmu_notifier_mm->lock); 227 hlist_add_head(&mn->hlist, &mm->mmu_notifier_mm->list); 228 spin_unlock(&mm->mmu_notifier_mm->lock); 229 230 mm_drop_all_locks(mm); 231 out_clean: 232 if (take_mmap_sem) 233 up_write(&mm->mmap_sem); 234 kfree(mmu_notifier_mm); 235 out: 236 BUG_ON(atomic_read(&mm->mm_users) <= 0); 237 return ret; 238 } 239 240 /* 241 * Must not hold mmap_sem nor any other VM related lock when calling 242 * this registration function. Must also ensure mm_users can't go down 243 * to zero while this runs to avoid races with mmu_notifier_release, 244 * so mm has to be current->mm or the mm should be pinned safely such 245 * as with get_task_mm(). If the mm is not current->mm, the mm_users 246 * pin should be released by calling mmput after mmu_notifier_register 247 * returns. mmu_notifier_unregister must be always called to 248 * unregister the notifier. mm_count is automatically pinned to allow 249 * mmu_notifier_unregister to safely run at any time later, before or 250 * after exit_mmap. ->release will always be called before exit_mmap 251 * frees the pages. 252 */ 253 int mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm) 254 { 255 return do_mmu_notifier_register(mn, mm, 1); 256 } 257 EXPORT_SYMBOL_GPL(mmu_notifier_register); 258 259 /* 260 * Same as mmu_notifier_register but here the caller must hold the 261 * mmap_sem in write mode. 262 */ 263 int __mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm) 264 { 265 return do_mmu_notifier_register(mn, mm, 0); 266 } 267 EXPORT_SYMBOL_GPL(__mmu_notifier_register); 268 269 /* this is called after the last mmu_notifier_unregister() returned */ 270 void __mmu_notifier_mm_destroy(struct mm_struct *mm) 271 { 272 BUG_ON(!hlist_empty(&mm->mmu_notifier_mm->list)); 273 kfree(mm->mmu_notifier_mm); 274 mm->mmu_notifier_mm = LIST_POISON1; /* debug */ 275 } 276 277 /* 278 * This releases the mm_count pin automatically and frees the mm 279 * structure if it was the last user of it. It serializes against 280 * running mmu notifiers with SRCU and against mmu_notifier_unregister 281 * with the unregister lock + SRCU. All sptes must be dropped before 282 * calling mmu_notifier_unregister. ->release or any other notifier 283 * method may be invoked concurrently with mmu_notifier_unregister, 284 * and only after mmu_notifier_unregister returned we're guaranteed 285 * that ->release or any other method can't run anymore. 286 */ 287 void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm) 288 { 289 BUG_ON(atomic_read(&mm->mm_count) <= 0); 290 291 if (!hlist_unhashed(&mn->hlist)) { 292 /* 293 * SRCU here will force exit_mmap to wait for ->release to 294 * finish before freeing the pages. 295 */ 296 int id; 297 298 id = srcu_read_lock(&srcu); 299 /* 300 * exit_mmap will block in mmu_notifier_release to guarantee 301 * that ->release is called before freeing the pages. 302 */ 303 if (mn->ops->release) 304 mn->ops->release(mn, mm); 305 srcu_read_unlock(&srcu, id); 306 307 spin_lock(&mm->mmu_notifier_mm->lock); 308 /* 309 * Can not use list_del_rcu() since __mmu_notifier_release 310 * can delete it before we hold the lock. 311 */ 312 hlist_del_init_rcu(&mn->hlist); 313 spin_unlock(&mm->mmu_notifier_mm->lock); 314 } 315 316 /* 317 * Wait for any running method to finish, of course including 318 * ->release if it was run by mmu_notifier_release instead of us. 319 */ 320 synchronize_srcu(&srcu); 321 322 BUG_ON(atomic_read(&mm->mm_count) <= 0); 323 324 mmdrop(mm); 325 } 326 EXPORT_SYMBOL_GPL(mmu_notifier_unregister); 327 328 static int __init mmu_notifier_init(void) 329 { 330 return init_srcu_struct(&srcu); 331 } 332 subsys_initcall(mmu_notifier_init); 333