1 /* 2 * linux/mm/mmu_notifier.c 3 * 4 * Copyright (C) 2008 Qumranet, Inc. 5 * Copyright (C) 2008 SGI 6 * Christoph Lameter <clameter@sgi.com> 7 * 8 * This work is licensed under the terms of the GNU GPL, version 2. See 9 * the COPYING file in the top-level directory. 10 */ 11 12 #include <linux/rculist.h> 13 #include <linux/mmu_notifier.h> 14 #include <linux/export.h> 15 #include <linux/mm.h> 16 #include <linux/err.h> 17 #include <linux/rcupdate.h> 18 #include <linux/sched.h> 19 #include <linux/slab.h> 20 21 /* 22 * This function can't run concurrently against mmu_notifier_register 23 * because mm->mm_users > 0 during mmu_notifier_register and exit_mmap 24 * runs with mm_users == 0. Other tasks may still invoke mmu notifiers 25 * in parallel despite there being no task using this mm any more, 26 * through the vmas outside of the exit_mmap context, such as with 27 * vmtruncate. This serializes against mmu_notifier_unregister with 28 * the mmu_notifier_mm->lock in addition to RCU and it serializes 29 * against the other mmu notifiers with RCU. struct mmu_notifier_mm 30 * can't go away from under us as exit_mmap holds an mm_count pin 31 * itself. 32 */ 33 void __mmu_notifier_release(struct mm_struct *mm) 34 { 35 struct mmu_notifier *mn; 36 struct hlist_node *n; 37 38 /* 39 * RCU here will block mmu_notifier_unregister until 40 * ->release returns. 41 */ 42 rcu_read_lock(); 43 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) 44 /* 45 * if ->release runs before mmu_notifier_unregister it 46 * must be handled as it's the only way for the driver 47 * to flush all existing sptes and stop the driver 48 * from establishing any more sptes before all the 49 * pages in the mm are freed. 50 */ 51 if (mn->ops->release) 52 mn->ops->release(mn, mm); 53 rcu_read_unlock(); 54 55 spin_lock(&mm->mmu_notifier_mm->lock); 56 while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) { 57 mn = hlist_entry(mm->mmu_notifier_mm->list.first, 58 struct mmu_notifier, 59 hlist); 60 /* 61 * We arrived before mmu_notifier_unregister so 62 * mmu_notifier_unregister will do nothing other than 63 * to wait ->release to finish and 64 * mmu_notifier_unregister to return. 65 */ 66 hlist_del_init_rcu(&mn->hlist); 67 } 68 spin_unlock(&mm->mmu_notifier_mm->lock); 69 70 /* 71 * synchronize_rcu here prevents mmu_notifier_release to 72 * return to exit_mmap (which would proceed freeing all pages 73 * in the mm) until the ->release method returns, if it was 74 * invoked by mmu_notifier_unregister. 75 * 76 * The mmu_notifier_mm can't go away from under us because one 77 * mm_count is hold by exit_mmap. 78 */ 79 synchronize_rcu(); 80 } 81 82 /* 83 * If no young bitflag is supported by the hardware, ->clear_flush_young can 84 * unmap the address and return 1 or 0 depending if the mapping previously 85 * existed or not. 86 */ 87 int __mmu_notifier_clear_flush_young(struct mm_struct *mm, 88 unsigned long address) 89 { 90 struct mmu_notifier *mn; 91 struct hlist_node *n; 92 int young = 0; 93 94 rcu_read_lock(); 95 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { 96 if (mn->ops->clear_flush_young) 97 young |= mn->ops->clear_flush_young(mn, mm, address); 98 } 99 rcu_read_unlock(); 100 101 return young; 102 } 103 104 int __mmu_notifier_test_young(struct mm_struct *mm, 105 unsigned long address) 106 { 107 struct mmu_notifier *mn; 108 struct hlist_node *n; 109 int young = 0; 110 111 rcu_read_lock(); 112 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { 113 if (mn->ops->test_young) { 114 young = mn->ops->test_young(mn, mm, address); 115 if (young) 116 break; 117 } 118 } 119 rcu_read_unlock(); 120 121 return young; 122 } 123 124 void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address, 125 pte_t pte) 126 { 127 struct mmu_notifier *mn; 128 struct hlist_node *n; 129 130 rcu_read_lock(); 131 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { 132 if (mn->ops->change_pte) 133 mn->ops->change_pte(mn, mm, address, pte); 134 /* 135 * Some drivers don't have change_pte, 136 * so we must call invalidate_page in that case. 137 */ 138 else if (mn->ops->invalidate_page) 139 mn->ops->invalidate_page(mn, mm, address); 140 } 141 rcu_read_unlock(); 142 } 143 144 void __mmu_notifier_invalidate_page(struct mm_struct *mm, 145 unsigned long address) 146 { 147 struct mmu_notifier *mn; 148 struct hlist_node *n; 149 150 rcu_read_lock(); 151 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { 152 if (mn->ops->invalidate_page) 153 mn->ops->invalidate_page(mn, mm, address); 154 } 155 rcu_read_unlock(); 156 } 157 158 void __mmu_notifier_invalidate_range_start(struct mm_struct *mm, 159 unsigned long start, unsigned long end) 160 { 161 struct mmu_notifier *mn; 162 struct hlist_node *n; 163 164 rcu_read_lock(); 165 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { 166 if (mn->ops->invalidate_range_start) 167 mn->ops->invalidate_range_start(mn, mm, start, end); 168 } 169 rcu_read_unlock(); 170 } 171 172 void __mmu_notifier_invalidate_range_end(struct mm_struct *mm, 173 unsigned long start, unsigned long end) 174 { 175 struct mmu_notifier *mn; 176 struct hlist_node *n; 177 178 rcu_read_lock(); 179 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { 180 if (mn->ops->invalidate_range_end) 181 mn->ops->invalidate_range_end(mn, mm, start, end); 182 } 183 rcu_read_unlock(); 184 } 185 186 static int do_mmu_notifier_register(struct mmu_notifier *mn, 187 struct mm_struct *mm, 188 int take_mmap_sem) 189 { 190 struct mmu_notifier_mm *mmu_notifier_mm; 191 int ret; 192 193 BUG_ON(atomic_read(&mm->mm_users) <= 0); 194 195 ret = -ENOMEM; 196 mmu_notifier_mm = kmalloc(sizeof(struct mmu_notifier_mm), GFP_KERNEL); 197 if (unlikely(!mmu_notifier_mm)) 198 goto out; 199 200 if (take_mmap_sem) 201 down_write(&mm->mmap_sem); 202 ret = mm_take_all_locks(mm); 203 if (unlikely(ret)) 204 goto out_cleanup; 205 206 if (!mm_has_notifiers(mm)) { 207 INIT_HLIST_HEAD(&mmu_notifier_mm->list); 208 spin_lock_init(&mmu_notifier_mm->lock); 209 mm->mmu_notifier_mm = mmu_notifier_mm; 210 mmu_notifier_mm = NULL; 211 } 212 atomic_inc(&mm->mm_count); 213 214 /* 215 * Serialize the update against mmu_notifier_unregister. A 216 * side note: mmu_notifier_release can't run concurrently with 217 * us because we hold the mm_users pin (either implicitly as 218 * current->mm or explicitly with get_task_mm() or similar). 219 * We can't race against any other mmu notifier method either 220 * thanks to mm_take_all_locks(). 221 */ 222 spin_lock(&mm->mmu_notifier_mm->lock); 223 hlist_add_head(&mn->hlist, &mm->mmu_notifier_mm->list); 224 spin_unlock(&mm->mmu_notifier_mm->lock); 225 226 mm_drop_all_locks(mm); 227 out_cleanup: 228 if (take_mmap_sem) 229 up_write(&mm->mmap_sem); 230 /* kfree() does nothing if mmu_notifier_mm is NULL */ 231 kfree(mmu_notifier_mm); 232 out: 233 BUG_ON(atomic_read(&mm->mm_users) <= 0); 234 return ret; 235 } 236 237 /* 238 * Must not hold mmap_sem nor any other VM related lock when calling 239 * this registration function. Must also ensure mm_users can't go down 240 * to zero while this runs to avoid races with mmu_notifier_release, 241 * so mm has to be current->mm or the mm should be pinned safely such 242 * as with get_task_mm(). If the mm is not current->mm, the mm_users 243 * pin should be released by calling mmput after mmu_notifier_register 244 * returns. mmu_notifier_unregister must be always called to 245 * unregister the notifier. mm_count is automatically pinned to allow 246 * mmu_notifier_unregister to safely run at any time later, before or 247 * after exit_mmap. ->release will always be called before exit_mmap 248 * frees the pages. 249 */ 250 int mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm) 251 { 252 return do_mmu_notifier_register(mn, mm, 1); 253 } 254 EXPORT_SYMBOL_GPL(mmu_notifier_register); 255 256 /* 257 * Same as mmu_notifier_register but here the caller must hold the 258 * mmap_sem in write mode. 259 */ 260 int __mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm) 261 { 262 return do_mmu_notifier_register(mn, mm, 0); 263 } 264 EXPORT_SYMBOL_GPL(__mmu_notifier_register); 265 266 /* this is called after the last mmu_notifier_unregister() returned */ 267 void __mmu_notifier_mm_destroy(struct mm_struct *mm) 268 { 269 BUG_ON(!hlist_empty(&mm->mmu_notifier_mm->list)); 270 kfree(mm->mmu_notifier_mm); 271 mm->mmu_notifier_mm = LIST_POISON1; /* debug */ 272 } 273 274 /* 275 * This releases the mm_count pin automatically and frees the mm 276 * structure if it was the last user of it. It serializes against 277 * running mmu notifiers with RCU and against mmu_notifier_unregister 278 * with the unregister lock + RCU. All sptes must be dropped before 279 * calling mmu_notifier_unregister. ->release or any other notifier 280 * method may be invoked concurrently with mmu_notifier_unregister, 281 * and only after mmu_notifier_unregister returned we're guaranteed 282 * that ->release or any other method can't run anymore. 283 */ 284 void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm) 285 { 286 BUG_ON(atomic_read(&mm->mm_count) <= 0); 287 288 if (!hlist_unhashed(&mn->hlist)) { 289 /* 290 * RCU here will force exit_mmap to wait ->release to finish 291 * before freeing the pages. 292 */ 293 rcu_read_lock(); 294 295 /* 296 * exit_mmap will block in mmu_notifier_release to 297 * guarantee ->release is called before freeing the 298 * pages. 299 */ 300 if (mn->ops->release) 301 mn->ops->release(mn, mm); 302 rcu_read_unlock(); 303 304 spin_lock(&mm->mmu_notifier_mm->lock); 305 hlist_del_rcu(&mn->hlist); 306 spin_unlock(&mm->mmu_notifier_mm->lock); 307 } 308 309 /* 310 * Wait any running method to finish, of course including 311 * ->release if it was run by mmu_notifier_relase instead of us. 312 */ 313 synchronize_rcu(); 314 315 BUG_ON(atomic_read(&mm->mm_count) <= 0); 316 317 mmdrop(mm); 318 } 319 EXPORT_SYMBOL_GPL(mmu_notifier_unregister); 320