1 /* 2 * linux/mm/mmu_notifier.c 3 * 4 * Copyright (C) 2008 Qumranet, Inc. 5 * Copyright (C) 2008 SGI 6 * Christoph Lameter <clameter@sgi.com> 7 * 8 * This work is licensed under the terms of the GNU GPL, version 2. See 9 * the COPYING file in the top-level directory. 10 */ 11 12 #include <linux/rculist.h> 13 #include <linux/mmu_notifier.h> 14 #include <linux/export.h> 15 #include <linux/mm.h> 16 #include <linux/err.h> 17 #include <linux/rcupdate.h> 18 #include <linux/sched.h> 19 #include <linux/slab.h> 20 21 /* 22 * This function can't run concurrently against mmu_notifier_register 23 * because mm->mm_users > 0 during mmu_notifier_register and exit_mmap 24 * runs with mm_users == 0. Other tasks may still invoke mmu notifiers 25 * in parallel despite there being no task using this mm any more, 26 * through the vmas outside of the exit_mmap context, such as with 27 * vmtruncate. This serializes against mmu_notifier_unregister with 28 * the mmu_notifier_mm->lock in addition to RCU and it serializes 29 * against the other mmu notifiers with RCU. struct mmu_notifier_mm 30 * can't go away from under us as exit_mmap holds an mm_count pin 31 * itself. 32 */ 33 void __mmu_notifier_release(struct mm_struct *mm) 34 { 35 struct mmu_notifier *mn; 36 37 spin_lock(&mm->mmu_notifier_mm->lock); 38 while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) { 39 mn = hlist_entry(mm->mmu_notifier_mm->list.first, 40 struct mmu_notifier, 41 hlist); 42 /* 43 * We arrived before mmu_notifier_unregister so 44 * mmu_notifier_unregister will do nothing other than 45 * to wait ->release to finish and 46 * mmu_notifier_unregister to return. 47 */ 48 hlist_del_init_rcu(&mn->hlist); 49 /* 50 * RCU here will block mmu_notifier_unregister until 51 * ->release returns. 52 */ 53 rcu_read_lock(); 54 spin_unlock(&mm->mmu_notifier_mm->lock); 55 /* 56 * if ->release runs before mmu_notifier_unregister it 57 * must be handled as it's the only way for the driver 58 * to flush all existing sptes and stop the driver 59 * from establishing any more sptes before all the 60 * pages in the mm are freed. 61 */ 62 if (mn->ops->release) 63 mn->ops->release(mn, mm); 64 rcu_read_unlock(); 65 spin_lock(&mm->mmu_notifier_mm->lock); 66 } 67 spin_unlock(&mm->mmu_notifier_mm->lock); 68 69 /* 70 * synchronize_rcu here prevents mmu_notifier_release to 71 * return to exit_mmap (which would proceed freeing all pages 72 * in the mm) until the ->release method returns, if it was 73 * invoked by mmu_notifier_unregister. 74 * 75 * The mmu_notifier_mm can't go away from under us because one 76 * mm_count is hold by exit_mmap. 77 */ 78 synchronize_rcu(); 79 } 80 81 /* 82 * If no young bitflag is supported by the hardware, ->clear_flush_young can 83 * unmap the address and return 1 or 0 depending if the mapping previously 84 * existed or not. 85 */ 86 int __mmu_notifier_clear_flush_young(struct mm_struct *mm, 87 unsigned long address) 88 { 89 struct mmu_notifier *mn; 90 struct hlist_node *n; 91 int young = 0; 92 93 rcu_read_lock(); 94 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { 95 if (mn->ops->clear_flush_young) 96 young |= mn->ops->clear_flush_young(mn, mm, address); 97 } 98 rcu_read_unlock(); 99 100 return young; 101 } 102 103 int __mmu_notifier_test_young(struct mm_struct *mm, 104 unsigned long address) 105 { 106 struct mmu_notifier *mn; 107 struct hlist_node *n; 108 int young = 0; 109 110 rcu_read_lock(); 111 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { 112 if (mn->ops->test_young) { 113 young = mn->ops->test_young(mn, mm, address); 114 if (young) 115 break; 116 } 117 } 118 rcu_read_unlock(); 119 120 return young; 121 } 122 123 void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address, 124 pte_t pte) 125 { 126 struct mmu_notifier *mn; 127 struct hlist_node *n; 128 129 rcu_read_lock(); 130 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { 131 if (mn->ops->change_pte) 132 mn->ops->change_pte(mn, mm, address, pte); 133 /* 134 * Some drivers don't have change_pte, 135 * so we must call invalidate_page in that case. 136 */ 137 else if (mn->ops->invalidate_page) 138 mn->ops->invalidate_page(mn, mm, address); 139 } 140 rcu_read_unlock(); 141 } 142 143 void __mmu_notifier_invalidate_page(struct mm_struct *mm, 144 unsigned long address) 145 { 146 struct mmu_notifier *mn; 147 struct hlist_node *n; 148 149 rcu_read_lock(); 150 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { 151 if (mn->ops->invalidate_page) 152 mn->ops->invalidate_page(mn, mm, address); 153 } 154 rcu_read_unlock(); 155 } 156 157 void __mmu_notifier_invalidate_range_start(struct mm_struct *mm, 158 unsigned long start, unsigned long end) 159 { 160 struct mmu_notifier *mn; 161 struct hlist_node *n; 162 163 rcu_read_lock(); 164 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { 165 if (mn->ops->invalidate_range_start) 166 mn->ops->invalidate_range_start(mn, mm, start, end); 167 } 168 rcu_read_unlock(); 169 } 170 171 void __mmu_notifier_invalidate_range_end(struct mm_struct *mm, 172 unsigned long start, unsigned long end) 173 { 174 struct mmu_notifier *mn; 175 struct hlist_node *n; 176 177 rcu_read_lock(); 178 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { 179 if (mn->ops->invalidate_range_end) 180 mn->ops->invalidate_range_end(mn, mm, start, end); 181 } 182 rcu_read_unlock(); 183 } 184 185 static int do_mmu_notifier_register(struct mmu_notifier *mn, 186 struct mm_struct *mm, 187 int take_mmap_sem) 188 { 189 struct mmu_notifier_mm *mmu_notifier_mm; 190 int ret; 191 192 BUG_ON(atomic_read(&mm->mm_users) <= 0); 193 194 ret = -ENOMEM; 195 mmu_notifier_mm = kmalloc(sizeof(struct mmu_notifier_mm), GFP_KERNEL); 196 if (unlikely(!mmu_notifier_mm)) 197 goto out; 198 199 if (take_mmap_sem) 200 down_write(&mm->mmap_sem); 201 ret = mm_take_all_locks(mm); 202 if (unlikely(ret)) 203 goto out_cleanup; 204 205 if (!mm_has_notifiers(mm)) { 206 INIT_HLIST_HEAD(&mmu_notifier_mm->list); 207 spin_lock_init(&mmu_notifier_mm->lock); 208 mm->mmu_notifier_mm = mmu_notifier_mm; 209 mmu_notifier_mm = NULL; 210 } 211 atomic_inc(&mm->mm_count); 212 213 /* 214 * Serialize the update against mmu_notifier_unregister. A 215 * side note: mmu_notifier_release can't run concurrently with 216 * us because we hold the mm_users pin (either implicitly as 217 * current->mm or explicitly with get_task_mm() or similar). 218 * We can't race against any other mmu notifier method either 219 * thanks to mm_take_all_locks(). 220 */ 221 spin_lock(&mm->mmu_notifier_mm->lock); 222 hlist_add_head(&mn->hlist, &mm->mmu_notifier_mm->list); 223 spin_unlock(&mm->mmu_notifier_mm->lock); 224 225 mm_drop_all_locks(mm); 226 out_cleanup: 227 if (take_mmap_sem) 228 up_write(&mm->mmap_sem); 229 /* kfree() does nothing if mmu_notifier_mm is NULL */ 230 kfree(mmu_notifier_mm); 231 out: 232 BUG_ON(atomic_read(&mm->mm_users) <= 0); 233 return ret; 234 } 235 236 /* 237 * Must not hold mmap_sem nor any other VM related lock when calling 238 * this registration function. Must also ensure mm_users can't go down 239 * to zero while this runs to avoid races with mmu_notifier_release, 240 * so mm has to be current->mm or the mm should be pinned safely such 241 * as with get_task_mm(). If the mm is not current->mm, the mm_users 242 * pin should be released by calling mmput after mmu_notifier_register 243 * returns. mmu_notifier_unregister must be always called to 244 * unregister the notifier. mm_count is automatically pinned to allow 245 * mmu_notifier_unregister to safely run at any time later, before or 246 * after exit_mmap. ->release will always be called before exit_mmap 247 * frees the pages. 248 */ 249 int mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm) 250 { 251 return do_mmu_notifier_register(mn, mm, 1); 252 } 253 EXPORT_SYMBOL_GPL(mmu_notifier_register); 254 255 /* 256 * Same as mmu_notifier_register but here the caller must hold the 257 * mmap_sem in write mode. 258 */ 259 int __mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm) 260 { 261 return do_mmu_notifier_register(mn, mm, 0); 262 } 263 EXPORT_SYMBOL_GPL(__mmu_notifier_register); 264 265 /* this is called after the last mmu_notifier_unregister() returned */ 266 void __mmu_notifier_mm_destroy(struct mm_struct *mm) 267 { 268 BUG_ON(!hlist_empty(&mm->mmu_notifier_mm->list)); 269 kfree(mm->mmu_notifier_mm); 270 mm->mmu_notifier_mm = LIST_POISON1; /* debug */ 271 } 272 273 /* 274 * This releases the mm_count pin automatically and frees the mm 275 * structure if it was the last user of it. It serializes against 276 * running mmu notifiers with RCU and against mmu_notifier_unregister 277 * with the unregister lock + RCU. All sptes must be dropped before 278 * calling mmu_notifier_unregister. ->release or any other notifier 279 * method may be invoked concurrently with mmu_notifier_unregister, 280 * and only after mmu_notifier_unregister returned we're guaranteed 281 * that ->release or any other method can't run anymore. 282 */ 283 void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm) 284 { 285 BUG_ON(atomic_read(&mm->mm_count) <= 0); 286 287 spin_lock(&mm->mmu_notifier_mm->lock); 288 if (!hlist_unhashed(&mn->hlist)) { 289 hlist_del_rcu(&mn->hlist); 290 291 /* 292 * RCU here will force exit_mmap to wait ->release to finish 293 * before freeing the pages. 294 */ 295 rcu_read_lock(); 296 spin_unlock(&mm->mmu_notifier_mm->lock); 297 /* 298 * exit_mmap will block in mmu_notifier_release to 299 * guarantee ->release is called before freeing the 300 * pages. 301 */ 302 if (mn->ops->release) 303 mn->ops->release(mn, mm); 304 rcu_read_unlock(); 305 } else 306 spin_unlock(&mm->mmu_notifier_mm->lock); 307 308 /* 309 * Wait any running method to finish, of course including 310 * ->release if it was run by mmu_notifier_relase instead of us. 311 */ 312 synchronize_rcu(); 313 314 BUG_ON(atomic_read(&mm->mm_count) <= 0); 315 316 mmdrop(mm); 317 } 318 EXPORT_SYMBOL_GPL(mmu_notifier_unregister); 319