1 /* 2 * Copyright IBM Corp. 2006 3 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> 4 */ 5 6 #include <linux/bootmem.h> 7 #include <linux/pfn.h> 8 #include <linux/mm.h> 9 #include <linux/init.h> 10 #include <linux/list.h> 11 #include <linux/hugetlb.h> 12 #include <linux/slab.h> 13 #include <linux/memblock.h> 14 #include <asm/cacheflush.h> 15 #include <asm/pgalloc.h> 16 #include <asm/pgtable.h> 17 #include <asm/setup.h> 18 #include <asm/tlbflush.h> 19 #include <asm/sections.h> 20 #include <asm/set_memory.h> 21 22 static DEFINE_MUTEX(vmem_mutex); 23 24 struct memory_segment { 25 struct list_head list; 26 unsigned long start; 27 unsigned long size; 28 }; 29 30 static LIST_HEAD(mem_segs); 31 32 static void __ref *vmem_alloc_pages(unsigned int order) 33 { 34 unsigned long size = PAGE_SIZE << order; 35 36 if (slab_is_available()) 37 return (void *)__get_free_pages(GFP_KERNEL, order); 38 return (void *) memblock_alloc(size, size); 39 } 40 41 static inline pud_t *vmem_pud_alloc(void) 42 { 43 pud_t *pud = NULL; 44 45 pud = vmem_alloc_pages(2); 46 if (!pud) 47 return NULL; 48 clear_table((unsigned long *) pud, _REGION3_ENTRY_EMPTY, PAGE_SIZE * 4); 49 return pud; 50 } 51 52 pmd_t *vmem_pmd_alloc(void) 53 { 54 pmd_t *pmd = NULL; 55 56 pmd = vmem_alloc_pages(2); 57 if (!pmd) 58 return NULL; 59 clear_table((unsigned long *) pmd, _SEGMENT_ENTRY_EMPTY, PAGE_SIZE * 4); 60 return pmd; 61 } 62 63 pte_t __ref *vmem_pte_alloc(void) 64 { 65 unsigned long size = PTRS_PER_PTE * sizeof(pte_t); 66 pte_t *pte; 67 68 if (slab_is_available()) 69 pte = (pte_t *) page_table_alloc(&init_mm); 70 else 71 pte = (pte_t *) memblock_alloc(size, size); 72 if (!pte) 73 return NULL; 74 clear_table((unsigned long *) pte, _PAGE_INVALID, size); 75 return pte; 76 } 77 78 /* 79 * Add a physical memory range to the 1:1 mapping. 80 */ 81 static int vmem_add_mem(unsigned long start, unsigned long size) 82 { 83 unsigned long pgt_prot, sgt_prot, r3_prot; 84 unsigned long pages4k, pages1m, pages2g; 85 unsigned long end = start + size; 86 unsigned long address = start; 87 pgd_t *pg_dir; 88 pud_t *pu_dir; 89 pmd_t *pm_dir; 90 pte_t *pt_dir; 91 int ret = -ENOMEM; 92 93 pgt_prot = pgprot_val(PAGE_KERNEL); 94 sgt_prot = pgprot_val(SEGMENT_KERNEL); 95 r3_prot = pgprot_val(REGION3_KERNEL); 96 if (!MACHINE_HAS_NX) { 97 pgt_prot &= ~_PAGE_NOEXEC; 98 sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC; 99 r3_prot &= ~_REGION_ENTRY_NOEXEC; 100 } 101 pages4k = pages1m = pages2g = 0; 102 while (address < end) { 103 pg_dir = pgd_offset_k(address); 104 if (pgd_none(*pg_dir)) { 105 pu_dir = vmem_pud_alloc(); 106 if (!pu_dir) 107 goto out; 108 pgd_populate(&init_mm, pg_dir, pu_dir); 109 } 110 pu_dir = pud_offset(pg_dir, address); 111 if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address && 112 !(address & ~PUD_MASK) && (address + PUD_SIZE <= end) && 113 !debug_pagealloc_enabled()) { 114 pud_val(*pu_dir) = address | r3_prot; 115 address += PUD_SIZE; 116 pages2g++; 117 continue; 118 } 119 if (pud_none(*pu_dir)) { 120 pm_dir = vmem_pmd_alloc(); 121 if (!pm_dir) 122 goto out; 123 pud_populate(&init_mm, pu_dir, pm_dir); 124 } 125 pm_dir = pmd_offset(pu_dir, address); 126 if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address && 127 !(address & ~PMD_MASK) && (address + PMD_SIZE <= end) && 128 !debug_pagealloc_enabled()) { 129 pmd_val(*pm_dir) = address | sgt_prot; 130 address += PMD_SIZE; 131 pages1m++; 132 continue; 133 } 134 if (pmd_none(*pm_dir)) { 135 pt_dir = vmem_pte_alloc(); 136 if (!pt_dir) 137 goto out; 138 pmd_populate(&init_mm, pm_dir, pt_dir); 139 } 140 141 pt_dir = pte_offset_kernel(pm_dir, address); 142 pte_val(*pt_dir) = address | pgt_prot; 143 address += PAGE_SIZE; 144 pages4k++; 145 } 146 ret = 0; 147 out: 148 update_page_count(PG_DIRECT_MAP_4K, pages4k); 149 update_page_count(PG_DIRECT_MAP_1M, pages1m); 150 update_page_count(PG_DIRECT_MAP_2G, pages2g); 151 return ret; 152 } 153 154 /* 155 * Remove a physical memory range from the 1:1 mapping. 156 * Currently only invalidates page table entries. 157 */ 158 static void vmem_remove_range(unsigned long start, unsigned long size) 159 { 160 unsigned long pages4k, pages1m, pages2g; 161 unsigned long end = start + size; 162 unsigned long address = start; 163 pgd_t *pg_dir; 164 pud_t *pu_dir; 165 pmd_t *pm_dir; 166 pte_t *pt_dir; 167 168 pages4k = pages1m = pages2g = 0; 169 while (address < end) { 170 pg_dir = pgd_offset_k(address); 171 if (pgd_none(*pg_dir)) { 172 address += PGDIR_SIZE; 173 continue; 174 } 175 pu_dir = pud_offset(pg_dir, address); 176 if (pud_none(*pu_dir)) { 177 address += PUD_SIZE; 178 continue; 179 } 180 if (pud_large(*pu_dir)) { 181 pud_clear(pu_dir); 182 address += PUD_SIZE; 183 pages2g++; 184 continue; 185 } 186 pm_dir = pmd_offset(pu_dir, address); 187 if (pmd_none(*pm_dir)) { 188 address += PMD_SIZE; 189 continue; 190 } 191 if (pmd_large(*pm_dir)) { 192 pmd_clear(pm_dir); 193 address += PMD_SIZE; 194 pages1m++; 195 continue; 196 } 197 pt_dir = pte_offset_kernel(pm_dir, address); 198 pte_clear(&init_mm, address, pt_dir); 199 address += PAGE_SIZE; 200 pages4k++; 201 } 202 flush_tlb_kernel_range(start, end); 203 update_page_count(PG_DIRECT_MAP_4K, -pages4k); 204 update_page_count(PG_DIRECT_MAP_1M, -pages1m); 205 update_page_count(PG_DIRECT_MAP_2G, -pages2g); 206 } 207 208 /* 209 * Add a backed mem_map array to the virtual mem_map array. 210 */ 211 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) 212 { 213 unsigned long pgt_prot, sgt_prot; 214 unsigned long address = start; 215 pgd_t *pg_dir; 216 pud_t *pu_dir; 217 pmd_t *pm_dir; 218 pte_t *pt_dir; 219 int ret = -ENOMEM; 220 221 pgt_prot = pgprot_val(PAGE_KERNEL); 222 sgt_prot = pgprot_val(SEGMENT_KERNEL); 223 if (!MACHINE_HAS_NX) { 224 pgt_prot &= ~_PAGE_NOEXEC; 225 sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC; 226 } 227 for (address = start; address < end;) { 228 pg_dir = pgd_offset_k(address); 229 if (pgd_none(*pg_dir)) { 230 pu_dir = vmem_pud_alloc(); 231 if (!pu_dir) 232 goto out; 233 pgd_populate(&init_mm, pg_dir, pu_dir); 234 } 235 236 pu_dir = pud_offset(pg_dir, address); 237 if (pud_none(*pu_dir)) { 238 pm_dir = vmem_pmd_alloc(); 239 if (!pm_dir) 240 goto out; 241 pud_populate(&init_mm, pu_dir, pm_dir); 242 } 243 244 pm_dir = pmd_offset(pu_dir, address); 245 if (pmd_none(*pm_dir)) { 246 /* Use 1MB frames for vmemmap if available. We always 247 * use large frames even if they are only partially 248 * used. 249 * Otherwise we would have also page tables since 250 * vmemmap_populate gets called for each section 251 * separately. */ 252 if (MACHINE_HAS_EDAT1) { 253 void *new_page; 254 255 new_page = vmemmap_alloc_block(PMD_SIZE, node); 256 if (!new_page) 257 goto out; 258 pmd_val(*pm_dir) = __pa(new_page) | sgt_prot; 259 address = (address + PMD_SIZE) & PMD_MASK; 260 continue; 261 } 262 pt_dir = vmem_pte_alloc(); 263 if (!pt_dir) 264 goto out; 265 pmd_populate(&init_mm, pm_dir, pt_dir); 266 } else if (pmd_large(*pm_dir)) { 267 address = (address + PMD_SIZE) & PMD_MASK; 268 continue; 269 } 270 271 pt_dir = pte_offset_kernel(pm_dir, address); 272 if (pte_none(*pt_dir)) { 273 void *new_page; 274 275 new_page = vmemmap_alloc_block(PAGE_SIZE, node); 276 if (!new_page) 277 goto out; 278 pte_val(*pt_dir) = __pa(new_page) | pgt_prot; 279 } 280 address += PAGE_SIZE; 281 } 282 ret = 0; 283 out: 284 return ret; 285 } 286 287 void vmemmap_free(unsigned long start, unsigned long end) 288 { 289 } 290 291 /* 292 * Add memory segment to the segment list if it doesn't overlap with 293 * an already present segment. 294 */ 295 static int insert_memory_segment(struct memory_segment *seg) 296 { 297 struct memory_segment *tmp; 298 299 if (seg->start + seg->size > VMEM_MAX_PHYS || 300 seg->start + seg->size < seg->start) 301 return -ERANGE; 302 303 list_for_each_entry(tmp, &mem_segs, list) { 304 if (seg->start >= tmp->start + tmp->size) 305 continue; 306 if (seg->start + seg->size <= tmp->start) 307 continue; 308 return -ENOSPC; 309 } 310 list_add(&seg->list, &mem_segs); 311 return 0; 312 } 313 314 /* 315 * Remove memory segment from the segment list. 316 */ 317 static void remove_memory_segment(struct memory_segment *seg) 318 { 319 list_del(&seg->list); 320 } 321 322 static void __remove_shared_memory(struct memory_segment *seg) 323 { 324 remove_memory_segment(seg); 325 vmem_remove_range(seg->start, seg->size); 326 } 327 328 int vmem_remove_mapping(unsigned long start, unsigned long size) 329 { 330 struct memory_segment *seg; 331 int ret; 332 333 mutex_lock(&vmem_mutex); 334 335 ret = -ENOENT; 336 list_for_each_entry(seg, &mem_segs, list) { 337 if (seg->start == start && seg->size == size) 338 break; 339 } 340 341 if (seg->start != start || seg->size != size) 342 goto out; 343 344 ret = 0; 345 __remove_shared_memory(seg); 346 kfree(seg); 347 out: 348 mutex_unlock(&vmem_mutex); 349 return ret; 350 } 351 352 int vmem_add_mapping(unsigned long start, unsigned long size) 353 { 354 struct memory_segment *seg; 355 int ret; 356 357 mutex_lock(&vmem_mutex); 358 ret = -ENOMEM; 359 seg = kzalloc(sizeof(*seg), GFP_KERNEL); 360 if (!seg) 361 goto out; 362 seg->start = start; 363 seg->size = size; 364 365 ret = insert_memory_segment(seg); 366 if (ret) 367 goto out_free; 368 369 ret = vmem_add_mem(start, size); 370 if (ret) 371 goto out_remove; 372 goto out; 373 374 out_remove: 375 __remove_shared_memory(seg); 376 out_free: 377 kfree(seg); 378 out: 379 mutex_unlock(&vmem_mutex); 380 return ret; 381 } 382 383 /* 384 * map whole physical memory to virtual memory (identity mapping) 385 * we reserve enough space in the vmalloc area for vmemmap to hotplug 386 * additional memory segments. 387 */ 388 void __init vmem_map_init(void) 389 { 390 struct memblock_region *reg; 391 392 for_each_memblock(memory, reg) 393 vmem_add_mem(reg->base, reg->size); 394 __set_memory((unsigned long) _stext, 395 (_etext - _stext) >> PAGE_SHIFT, 396 SET_MEMORY_RO | SET_MEMORY_X); 397 __set_memory((unsigned long) _etext, 398 (_eshared - _etext) >> PAGE_SHIFT, 399 SET_MEMORY_RO); 400 __set_memory((unsigned long) _sinittext, 401 (_einittext - _sinittext) >> PAGE_SHIFT, 402 SET_MEMORY_RO | SET_MEMORY_X); 403 pr_info("Write protected kernel read-only data: %luk\n", 404 (_eshared - _stext) >> 10); 405 } 406 407 /* 408 * Convert memblock.memory to a memory segment list so there is a single 409 * list that contains all memory segments. 410 */ 411 static int __init vmem_convert_memory_chunk(void) 412 { 413 struct memblock_region *reg; 414 struct memory_segment *seg; 415 416 mutex_lock(&vmem_mutex); 417 for_each_memblock(memory, reg) { 418 seg = kzalloc(sizeof(*seg), GFP_KERNEL); 419 if (!seg) 420 panic("Out of memory...\n"); 421 seg->start = reg->base; 422 seg->size = reg->size; 423 insert_memory_segment(seg); 424 } 425 mutex_unlock(&vmem_mutex); 426 return 0; 427 } 428 429 core_initcall(vmem_convert_memory_chunk); 430