xref: /openbmc/linux/arch/x86/mm/pgtable_32.c (revision 643d1f7f)
1 /*
2  *  linux/arch/i386/mm/pgtable.c
3  */
4 
5 #include <linux/sched.h>
6 #include <linux/kernel.h>
7 #include <linux/errno.h>
8 #include <linux/mm.h>
9 #include <linux/nmi.h>
10 #include <linux/swap.h>
11 #include <linux/smp.h>
12 #include <linux/highmem.h>
13 #include <linux/slab.h>
14 #include <linux/pagemap.h>
15 #include <linux/spinlock.h>
16 #include <linux/module.h>
17 #include <linux/quicklist.h>
18 
19 #include <asm/system.h>
20 #include <asm/pgtable.h>
21 #include <asm/pgalloc.h>
22 #include <asm/fixmap.h>
23 #include <asm/e820.h>
24 #include <asm/tlb.h>
25 #include <asm/tlbflush.h>
26 
27 void show_mem(void)
28 {
29 	int total = 0, reserved = 0;
30 	int shared = 0, cached = 0;
31 	int highmem = 0;
32 	struct page *page;
33 	pg_data_t *pgdat;
34 	unsigned long i;
35 	unsigned long flags;
36 
37 	printk(KERN_INFO "Mem-info:\n");
38 	show_free_areas();
39 	printk(KERN_INFO "Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
40 	for_each_online_pgdat(pgdat) {
41 		pgdat_resize_lock(pgdat, &flags);
42 		for (i = 0; i < pgdat->node_spanned_pages; ++i) {
43 			if (unlikely(i % MAX_ORDER_NR_PAGES == 0))
44 				touch_nmi_watchdog();
45 			page = pgdat_page_nr(pgdat, i);
46 			total++;
47 			if (PageHighMem(page))
48 				highmem++;
49 			if (PageReserved(page))
50 				reserved++;
51 			else if (PageSwapCache(page))
52 				cached++;
53 			else if (page_count(page))
54 				shared += page_count(page) - 1;
55 		}
56 		pgdat_resize_unlock(pgdat, &flags);
57 	}
58 	printk(KERN_INFO "%d pages of RAM\n", total);
59 	printk(KERN_INFO "%d pages of HIGHMEM\n", highmem);
60 	printk(KERN_INFO "%d reserved pages\n", reserved);
61 	printk(KERN_INFO "%d pages shared\n", shared);
62 	printk(KERN_INFO "%d pages swap cached\n", cached);
63 
64 	printk(KERN_INFO "%lu pages dirty\n", global_page_state(NR_FILE_DIRTY));
65 	printk(KERN_INFO "%lu pages writeback\n",
66 					global_page_state(NR_WRITEBACK));
67 	printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED));
68 	printk(KERN_INFO "%lu pages slab\n",
69 		global_page_state(NR_SLAB_RECLAIMABLE) +
70 		global_page_state(NR_SLAB_UNRECLAIMABLE));
71 	printk(KERN_INFO "%lu pages pagetables\n",
72 					global_page_state(NR_PAGETABLE));
73 }
74 
75 /*
76  * Associate a virtual page frame with a given physical page frame
77  * and protection flags for that frame.
78  */
79 static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
80 {
81 	pgd_t *pgd;
82 	pud_t *pud;
83 	pmd_t *pmd;
84 	pte_t *pte;
85 
86 	pgd = swapper_pg_dir + pgd_index(vaddr);
87 	if (pgd_none(*pgd)) {
88 		BUG();
89 		return;
90 	}
91 	pud = pud_offset(pgd, vaddr);
92 	if (pud_none(*pud)) {
93 		BUG();
94 		return;
95 	}
96 	pmd = pmd_offset(pud, vaddr);
97 	if (pmd_none(*pmd)) {
98 		BUG();
99 		return;
100 	}
101 	pte = pte_offset_kernel(pmd, vaddr);
102 	if (pgprot_val(flags))
103 		set_pte_present(&init_mm, vaddr, pte, pfn_pte(pfn, flags));
104 	else
105 		pte_clear(&init_mm, vaddr, pte);
106 
107 	/*
108 	 * It's enough to flush this one mapping.
109 	 * (PGE mappings get flushed as well)
110 	 */
111 	__flush_tlb_one(vaddr);
112 }
113 
114 /*
115  * Associate a large virtual page frame with a given physical page frame
116  * and protection flags for that frame. pfn is for the base of the page,
117  * vaddr is what the page gets mapped to - both must be properly aligned.
118  * The pmd must already be instantiated. Assumes PAE mode.
119  */
120 void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
121 {
122 	pgd_t *pgd;
123 	pud_t *pud;
124 	pmd_t *pmd;
125 
126 	if (vaddr & (PMD_SIZE-1)) {		/* vaddr is misaligned */
127 		printk(KERN_WARNING "set_pmd_pfn: vaddr misaligned\n");
128 		return; /* BUG(); */
129 	}
130 	if (pfn & (PTRS_PER_PTE-1)) {		/* pfn is misaligned */
131 		printk(KERN_WARNING "set_pmd_pfn: pfn misaligned\n");
132 		return; /* BUG(); */
133 	}
134 	pgd = swapper_pg_dir + pgd_index(vaddr);
135 	if (pgd_none(*pgd)) {
136 		printk(KERN_WARNING "set_pmd_pfn: pgd_none\n");
137 		return; /* BUG(); */
138 	}
139 	pud = pud_offset(pgd, vaddr);
140 	pmd = pmd_offset(pud, vaddr);
141 	set_pmd(pmd, pfn_pmd(pfn, flags));
142 	/*
143 	 * It's enough to flush this one mapping.
144 	 * (PGE mappings get flushed as well)
145 	 */
146 	__flush_tlb_one(vaddr);
147 }
148 
149 static int fixmaps;
150 unsigned long __FIXADDR_TOP = 0xfffff000;
151 EXPORT_SYMBOL(__FIXADDR_TOP);
152 
153 void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
154 {
155 	unsigned long address = __fix_to_virt(idx);
156 
157 	if (idx >= __end_of_fixed_addresses) {
158 		BUG();
159 		return;
160 	}
161 	set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
162 	fixmaps++;
163 }
164 
165 /**
166  * reserve_top_address - reserves a hole in the top of kernel address space
167  * @reserve - size of hole to reserve
168  *
169  * Can be used to relocate the fixmap area and poke a hole in the top
170  * of kernel address space to make room for a hypervisor.
171  */
172 void reserve_top_address(unsigned long reserve)
173 {
174 	BUG_ON(fixmaps > 0);
175 	printk(KERN_INFO "Reserving virtual address space above 0x%08x\n",
176 	       (int)-reserve);
177 	__FIXADDR_TOP = -reserve - PAGE_SIZE;
178 	__VMALLOC_RESERVE += reserve;
179 }
180 
181 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
182 {
183 	return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
184 }
185 
186 struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
187 {
188 	struct page *pte;
189 
190 #ifdef CONFIG_HIGHPTE
191 	pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
192 #else
193 	pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
194 #endif
195 	return pte;
196 }
197 
198 /*
199  * List of all pgd's needed for non-PAE so it can invalidate entries
200  * in both cached and uncached pgd's; not needed for PAE since the
201  * kernel pmd is shared. If PAE were not to share the pmd a similar
202  * tactic would be needed. This is essentially codepath-based locking
203  * against pageattr.c; it is the unique case in which a valid change
204  * of kernel pagetables can't be lazily synchronized by vmalloc faults.
205  * vmalloc faults work because attached pagetables are never freed.
206  * -- wli
207  */
208 static inline void pgd_list_add(pgd_t *pgd)
209 {
210 	struct page *page = virt_to_page(pgd);
211 
212 	list_add(&page->lru, &pgd_list);
213 }
214 
215 static inline void pgd_list_del(pgd_t *pgd)
216 {
217 	struct page *page = virt_to_page(pgd);
218 
219 	list_del(&page->lru);
220 }
221 
222 
223 
224 #if (PTRS_PER_PMD == 1)
225 /* Non-PAE pgd constructor */
226 static void pgd_ctor(void *pgd)
227 {
228 	unsigned long flags;
229 
230 	/* !PAE, no pagetable sharing */
231 	memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
232 
233 	spin_lock_irqsave(&pgd_lock, flags);
234 
235 	/* must happen under lock */
236 	clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
237 			swapper_pg_dir + USER_PTRS_PER_PGD,
238 			KERNEL_PGD_PTRS);
239 	paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
240 				__pa(swapper_pg_dir) >> PAGE_SHIFT,
241 				USER_PTRS_PER_PGD,
242 				KERNEL_PGD_PTRS);
243 	pgd_list_add(pgd);
244 	spin_unlock_irqrestore(&pgd_lock, flags);
245 }
246 #else  /* PTRS_PER_PMD > 1 */
247 /* PAE pgd constructor */
248 static void pgd_ctor(void *pgd)
249 {
250 	/* PAE, kernel PMD may be shared */
251 
252 	if (SHARED_KERNEL_PMD) {
253 		clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
254 				swapper_pg_dir + USER_PTRS_PER_PGD,
255 				KERNEL_PGD_PTRS);
256 	} else {
257 		unsigned long flags;
258 
259 		memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
260 		spin_lock_irqsave(&pgd_lock, flags);
261 		pgd_list_add(pgd);
262 		spin_unlock_irqrestore(&pgd_lock, flags);
263 	}
264 }
265 #endif	/* PTRS_PER_PMD */
266 
267 static void pgd_dtor(void *pgd)
268 {
269 	unsigned long flags; /* can be called from interrupt context */
270 
271 	if (SHARED_KERNEL_PMD)
272 		return;
273 
274 	spin_lock_irqsave(&pgd_lock, flags);
275 	pgd_list_del(pgd);
276 	spin_unlock_irqrestore(&pgd_lock, flags);
277 }
278 
279 #define UNSHARED_PTRS_PER_PGD				\
280 	(SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
281 
282 #ifdef CONFIG_X86_PAE
283 /*
284  * Mop up any pmd pages which may still be attached to the pgd.
285  * Normally they will be freed by munmap/exit_mmap, but any pmd we
286  * preallocate which never got a corresponding vma will need to be
287  * freed manually.
288  */
289 static void pgd_mop_up_pmds(pgd_t *pgdp)
290 {
291 	int i;
292 
293 	for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) {
294 		pgd_t pgd = pgdp[i];
295 
296 		if (pgd_val(pgd) != 0) {
297 			pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
298 
299 			pgdp[i] = native_make_pgd(0);
300 
301 			paravirt_release_pd(pgd_val(pgd) >> PAGE_SHIFT);
302 			pmd_free(pmd);
303 		}
304 	}
305 }
306 
307 /*
308  * In PAE mode, we need to do a cr3 reload (=tlb flush) when
309  * updating the top-level pagetable entries to guarantee the
310  * processor notices the update.  Since this is expensive, and
311  * all 4 top-level entries are used almost immediately in a
312  * new process's life, we just pre-populate them here.
313  *
314  * Also, if we're in a paravirt environment where the kernel pmd is
315  * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
316  * and initialize the kernel pmds here.
317  */
318 static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
319 {
320 	pud_t *pud;
321 	unsigned long addr;
322 	int i;
323 
324 	pud = pud_offset(pgd, 0);
325  	for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD;
326 	     i++, pud++, addr += PUD_SIZE) {
327 		pmd_t *pmd = pmd_alloc_one(mm, addr);
328 
329 		if (!pmd) {
330 			pgd_mop_up_pmds(pgd);
331 			return 0;
332 		}
333 
334 		if (i >= USER_PTRS_PER_PGD)
335 			memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
336 			       sizeof(pmd_t) * PTRS_PER_PMD);
337 
338 		pud_populate(mm, pud, pmd);
339 	}
340 
341 	return 1;
342 }
343 #else  /* !CONFIG_X86_PAE */
344 /* No need to prepopulate any pagetable entries in non-PAE modes. */
345 static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
346 {
347 	return 1;
348 }
349 
350 static void pgd_mop_up_pmds(pgd_t *pgd)
351 {
352 }
353 #endif	/* CONFIG_X86_PAE */
354 
355 pgd_t *pgd_alloc(struct mm_struct *mm)
356 {
357 	pgd_t *pgd = quicklist_alloc(0, GFP_KERNEL, pgd_ctor);
358 
359 	mm->pgd = pgd;		/* so that alloc_pd can use it */
360 
361 	if (pgd && !pgd_prepopulate_pmd(mm, pgd)) {
362 		quicklist_free(0, pgd_dtor, pgd);
363 		pgd = NULL;
364 	}
365 
366 	return pgd;
367 }
368 
369 void pgd_free(pgd_t *pgd)
370 {
371 	pgd_mop_up_pmds(pgd);
372 	quicklist_free(0, pgd_dtor, pgd);
373 }
374 
375 void check_pgt_cache(void)
376 {
377 	quicklist_trim(0, pgd_dtor, 25, 16);
378 }
379 
380 void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
381 {
382 	paravirt_release_pt(page_to_pfn(pte));
383 	tlb_remove_page(tlb, pte);
384 }
385 
386 #ifdef CONFIG_X86_PAE
387 
388 void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
389 {
390 	/* This is called just after the pmd has been detached from
391 	   the pgd, which requires a full tlb flush to be recognized
392 	   by the CPU.  Rather than incurring multiple tlb flushes
393 	   while the address space is being pulled down, make the tlb
394 	   gathering machinery do a full flush when we're done. */
395 	tlb->fullmm = 1;
396 
397 	paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
398 	tlb_remove_page(tlb, virt_to_page(pmd));
399 }
400 
401 #endif
402