1 /* 2 * linux/arch/i386/mm/pgtable.c 3 */ 4 5 #include <linux/sched.h> 6 #include <linux/kernel.h> 7 #include <linux/errno.h> 8 #include <linux/mm.h> 9 #include <linux/nmi.h> 10 #include <linux/swap.h> 11 #include <linux/smp.h> 12 #include <linux/highmem.h> 13 #include <linux/slab.h> 14 #include <linux/pagemap.h> 15 #include <linux/spinlock.h> 16 #include <linux/module.h> 17 #include <linux/quicklist.h> 18 19 #include <asm/system.h> 20 #include <asm/pgtable.h> 21 #include <asm/pgalloc.h> 22 #include <asm/fixmap.h> 23 #include <asm/e820.h> 24 #include <asm/tlb.h> 25 #include <asm/tlbflush.h> 26 27 void show_mem(void) 28 { 29 int total = 0, reserved = 0; 30 int shared = 0, cached = 0; 31 int highmem = 0; 32 struct page *page; 33 pg_data_t *pgdat; 34 unsigned long i; 35 unsigned long flags; 36 37 printk(KERN_INFO "Mem-info:\n"); 38 show_free_areas(); 39 printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); 40 for_each_online_pgdat(pgdat) { 41 pgdat_resize_lock(pgdat, &flags); 42 for (i = 0; i < pgdat->node_spanned_pages; ++i) { 43 if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) 44 touch_nmi_watchdog(); 45 page = pgdat_page_nr(pgdat, i); 46 total++; 47 if (PageHighMem(page)) 48 highmem++; 49 if (PageReserved(page)) 50 reserved++; 51 else if (PageSwapCache(page)) 52 cached++; 53 else if (page_count(page)) 54 shared += page_count(page) - 1; 55 } 56 pgdat_resize_unlock(pgdat, &flags); 57 } 58 printk(KERN_INFO "%d pages of RAM\n", total); 59 printk(KERN_INFO "%d pages of HIGHMEM\n", highmem); 60 printk(KERN_INFO "%d reserved pages\n", reserved); 61 printk(KERN_INFO "%d pages shared\n", shared); 62 printk(KERN_INFO "%d pages swap cached\n", cached); 63 64 printk(KERN_INFO "%lu pages dirty\n", global_page_state(NR_FILE_DIRTY)); 65 printk(KERN_INFO "%lu pages writeback\n", 66 global_page_state(NR_WRITEBACK)); 67 printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED)); 68 printk(KERN_INFO "%lu pages slab\n", 69 global_page_state(NR_SLAB_RECLAIMABLE) + 70 global_page_state(NR_SLAB_UNRECLAIMABLE)); 71 printk(KERN_INFO "%lu pages pagetables\n", 72 global_page_state(NR_PAGETABLE)); 73 } 74 75 /* 76 * Associate a virtual page frame with a given physical page frame 77 * and protection flags for that frame. 78 */ 79 static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) 80 { 81 pgd_t *pgd; 82 pud_t *pud; 83 pmd_t *pmd; 84 pte_t *pte; 85 86 pgd = swapper_pg_dir + pgd_index(vaddr); 87 if (pgd_none(*pgd)) { 88 BUG(); 89 return; 90 } 91 pud = pud_offset(pgd, vaddr); 92 if (pud_none(*pud)) { 93 BUG(); 94 return; 95 } 96 pmd = pmd_offset(pud, vaddr); 97 if (pmd_none(*pmd)) { 98 BUG(); 99 return; 100 } 101 pte = pte_offset_kernel(pmd, vaddr); 102 if (pgprot_val(flags)) 103 set_pte_present(&init_mm, vaddr, pte, pfn_pte(pfn, flags)); 104 else 105 pte_clear(&init_mm, vaddr, pte); 106 107 /* 108 * It's enough to flush this one mapping. 109 * (PGE mappings get flushed as well) 110 */ 111 __flush_tlb_one(vaddr); 112 } 113 114 /* 115 * Associate a large virtual page frame with a given physical page frame 116 * and protection flags for that frame. pfn is for the base of the page, 117 * vaddr is what the page gets mapped to - both must be properly aligned. 118 * The pmd must already be instantiated. Assumes PAE mode. 119 */ 120 void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) 121 { 122 pgd_t *pgd; 123 pud_t *pud; 124 pmd_t *pmd; 125 126 if (vaddr & (PMD_SIZE-1)) { /* vaddr is misaligned */ 127 printk(KERN_WARNING "set_pmd_pfn: vaddr misaligned\n"); 128 return; /* BUG(); */ 129 } 130 if (pfn & (PTRS_PER_PTE-1)) { /* pfn is misaligned */ 131 printk(KERN_WARNING "set_pmd_pfn: pfn misaligned\n"); 132 return; /* BUG(); */ 133 } 134 pgd = swapper_pg_dir + pgd_index(vaddr); 135 if (pgd_none(*pgd)) { 136 printk(KERN_WARNING "set_pmd_pfn: pgd_none\n"); 137 return; /* BUG(); */ 138 } 139 pud = pud_offset(pgd, vaddr); 140 pmd = pmd_offset(pud, vaddr); 141 set_pmd(pmd, pfn_pmd(pfn, flags)); 142 /* 143 * It's enough to flush this one mapping. 144 * (PGE mappings get flushed as well) 145 */ 146 __flush_tlb_one(vaddr); 147 } 148 149 static int fixmaps; 150 unsigned long __FIXADDR_TOP = 0xfffff000; 151 EXPORT_SYMBOL(__FIXADDR_TOP); 152 153 void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags) 154 { 155 unsigned long address = __fix_to_virt(idx); 156 157 if (idx >= __end_of_fixed_addresses) { 158 BUG(); 159 return; 160 } 161 set_pte_pfn(address, phys >> PAGE_SHIFT, flags); 162 fixmaps++; 163 } 164 165 /** 166 * reserve_top_address - reserves a hole in the top of kernel address space 167 * @reserve - size of hole to reserve 168 * 169 * Can be used to relocate the fixmap area and poke a hole in the top 170 * of kernel address space to make room for a hypervisor. 171 */ 172 void reserve_top_address(unsigned long reserve) 173 { 174 BUG_ON(fixmaps > 0); 175 printk(KERN_INFO "Reserving virtual address space above 0x%08x\n", 176 (int)-reserve); 177 __FIXADDR_TOP = -reserve - PAGE_SIZE; 178 __VMALLOC_RESERVE += reserve; 179 } 180 181 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) 182 { 183 return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); 184 } 185 186 struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) 187 { 188 struct page *pte; 189 190 #ifdef CONFIG_HIGHPTE 191 pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0); 192 #else 193 pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); 194 #endif 195 return pte; 196 } 197 198 /* 199 * List of all pgd's needed for non-PAE so it can invalidate entries 200 * in both cached and uncached pgd's; not needed for PAE since the 201 * kernel pmd is shared. If PAE were not to share the pmd a similar 202 * tactic would be needed. This is essentially codepath-based locking 203 * against pageattr.c; it is the unique case in which a valid change 204 * of kernel pagetables can't be lazily synchronized by vmalloc faults. 205 * vmalloc faults work because attached pagetables are never freed. 206 * -- wli 207 */ 208 static inline void pgd_list_add(pgd_t *pgd) 209 { 210 struct page *page = virt_to_page(pgd); 211 212 list_add(&page->lru, &pgd_list); 213 } 214 215 static inline void pgd_list_del(pgd_t *pgd) 216 { 217 struct page *page = virt_to_page(pgd); 218 219 list_del(&page->lru); 220 } 221 222 223 224 #if (PTRS_PER_PMD == 1) 225 /* Non-PAE pgd constructor */ 226 static void pgd_ctor(void *pgd) 227 { 228 unsigned long flags; 229 230 /* !PAE, no pagetable sharing */ 231 memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); 232 233 spin_lock_irqsave(&pgd_lock, flags); 234 235 /* must happen under lock */ 236 clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, 237 swapper_pg_dir + USER_PTRS_PER_PGD, 238 KERNEL_PGD_PTRS); 239 paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT, 240 __pa(swapper_pg_dir) >> PAGE_SHIFT, 241 USER_PTRS_PER_PGD, 242 KERNEL_PGD_PTRS); 243 pgd_list_add(pgd); 244 spin_unlock_irqrestore(&pgd_lock, flags); 245 } 246 #else /* PTRS_PER_PMD > 1 */ 247 /* PAE pgd constructor */ 248 static void pgd_ctor(void *pgd) 249 { 250 /* PAE, kernel PMD may be shared */ 251 252 if (SHARED_KERNEL_PMD) { 253 clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, 254 swapper_pg_dir + USER_PTRS_PER_PGD, 255 KERNEL_PGD_PTRS); 256 } else { 257 unsigned long flags; 258 259 memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); 260 spin_lock_irqsave(&pgd_lock, flags); 261 pgd_list_add(pgd); 262 spin_unlock_irqrestore(&pgd_lock, flags); 263 } 264 } 265 #endif /* PTRS_PER_PMD */ 266 267 static void pgd_dtor(void *pgd) 268 { 269 unsigned long flags; /* can be called from interrupt context */ 270 271 if (SHARED_KERNEL_PMD) 272 return; 273 274 spin_lock_irqsave(&pgd_lock, flags); 275 pgd_list_del(pgd); 276 spin_unlock_irqrestore(&pgd_lock, flags); 277 } 278 279 #define UNSHARED_PTRS_PER_PGD \ 280 (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD) 281 282 #ifdef CONFIG_X86_PAE 283 /* 284 * Mop up any pmd pages which may still be attached to the pgd. 285 * Normally they will be freed by munmap/exit_mmap, but any pmd we 286 * preallocate which never got a corresponding vma will need to be 287 * freed manually. 288 */ 289 static void pgd_mop_up_pmds(pgd_t *pgdp) 290 { 291 int i; 292 293 for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) { 294 pgd_t pgd = pgdp[i]; 295 296 if (pgd_val(pgd) != 0) { 297 pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd); 298 299 pgdp[i] = native_make_pgd(0); 300 301 paravirt_release_pd(pgd_val(pgd) >> PAGE_SHIFT); 302 pmd_free(pmd); 303 } 304 } 305 } 306 307 /* 308 * In PAE mode, we need to do a cr3 reload (=tlb flush) when 309 * updating the top-level pagetable entries to guarantee the 310 * processor notices the update. Since this is expensive, and 311 * all 4 top-level entries are used almost immediately in a 312 * new process's life, we just pre-populate them here. 313 * 314 * Also, if we're in a paravirt environment where the kernel pmd is 315 * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate 316 * and initialize the kernel pmds here. 317 */ 318 static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd) 319 { 320 pud_t *pud; 321 unsigned long addr; 322 int i; 323 324 pud = pud_offset(pgd, 0); 325 for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD; 326 i++, pud++, addr += PUD_SIZE) { 327 pmd_t *pmd = pmd_alloc_one(mm, addr); 328 329 if (!pmd) { 330 pgd_mop_up_pmds(pgd); 331 return 0; 332 } 333 334 if (i >= USER_PTRS_PER_PGD) 335 memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]), 336 sizeof(pmd_t) * PTRS_PER_PMD); 337 338 pud_populate(mm, pud, pmd); 339 } 340 341 return 1; 342 } 343 #else /* !CONFIG_X86_PAE */ 344 /* No need to prepopulate any pagetable entries in non-PAE modes. */ 345 static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd) 346 { 347 return 1; 348 } 349 350 static void pgd_mop_up_pmds(pgd_t *pgd) 351 { 352 } 353 #endif /* CONFIG_X86_PAE */ 354 355 pgd_t *pgd_alloc(struct mm_struct *mm) 356 { 357 pgd_t *pgd = quicklist_alloc(0, GFP_KERNEL, pgd_ctor); 358 359 mm->pgd = pgd; /* so that alloc_pd can use it */ 360 361 if (pgd && !pgd_prepopulate_pmd(mm, pgd)) { 362 quicklist_free(0, pgd_dtor, pgd); 363 pgd = NULL; 364 } 365 366 return pgd; 367 } 368 369 void pgd_free(pgd_t *pgd) 370 { 371 pgd_mop_up_pmds(pgd); 372 quicklist_free(0, pgd_dtor, pgd); 373 } 374 375 void check_pgt_cache(void) 376 { 377 quicklist_trim(0, pgd_dtor, 25, 16); 378 } 379 380 void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte) 381 { 382 paravirt_release_pt(page_to_pfn(pte)); 383 tlb_remove_page(tlb, pte); 384 } 385 386 #ifdef CONFIG_X86_PAE 387 388 void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) 389 { 390 /* This is called just after the pmd has been detached from 391 the pgd, which requires a full tlb flush to be recognized 392 by the CPU. Rather than incurring multiple tlb flushes 393 while the address space is being pulled down, make the tlb 394 gathering machinery do a full flush when we're done. */ 395 tlb->fullmm = 1; 396 397 paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); 398 tlb_remove_page(tlb, virt_to_page(pmd)); 399 } 400 401 #endif 402