1 /* 2 * Copyright IBM Corp. 2006 3 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> 4 */ 5 6 #include <linux/bootmem.h> 7 #include <linux/pfn.h> 8 #include <linux/mm.h> 9 #include <linux/init.h> 10 #include <linux/list.h> 11 #include <linux/hugetlb.h> 12 #include <linux/slab.h> 13 #include <linux/memblock.h> 14 #include <asm/cacheflush.h> 15 #include <asm/pgalloc.h> 16 #include <asm/pgtable.h> 17 #include <asm/setup.h> 18 #include <asm/tlbflush.h> 19 #include <asm/sections.h> 20 #include <asm/set_memory.h> 21 22 static DEFINE_MUTEX(vmem_mutex); 23 24 struct memory_segment { 25 struct list_head list; 26 unsigned long start; 27 unsigned long size; 28 }; 29 30 static LIST_HEAD(mem_segs); 31 32 static void __ref *vmem_alloc_pages(unsigned int order) 33 { 34 unsigned long size = PAGE_SIZE << order; 35 36 if (slab_is_available()) 37 return (void *)__get_free_pages(GFP_KERNEL, order); 38 return (void *) memblock_alloc(size, size); 39 } 40 41 static inline p4d_t *vmem_p4d_alloc(void) 42 { 43 p4d_t *p4d = NULL; 44 45 p4d = vmem_alloc_pages(2); 46 if (!p4d) 47 return NULL; 48 clear_table((unsigned long *) p4d, _REGION2_ENTRY_EMPTY, PAGE_SIZE * 4); 49 return p4d; 50 } 51 52 static inline pud_t *vmem_pud_alloc(void) 53 { 54 pud_t *pud = NULL; 55 56 pud = vmem_alloc_pages(2); 57 if (!pud) 58 return NULL; 59 clear_table((unsigned long *) pud, _REGION3_ENTRY_EMPTY, PAGE_SIZE * 4); 60 return pud; 61 } 62 63 pmd_t *vmem_pmd_alloc(void) 64 { 65 pmd_t *pmd = NULL; 66 67 pmd = vmem_alloc_pages(2); 68 if (!pmd) 69 return NULL; 70 clear_table((unsigned long *) pmd, _SEGMENT_ENTRY_EMPTY, PAGE_SIZE * 4); 71 return pmd; 72 } 73 74 pte_t __ref *vmem_pte_alloc(void) 75 { 76 unsigned long size = PTRS_PER_PTE * sizeof(pte_t); 77 pte_t *pte; 78 79 if (slab_is_available()) 80 pte = (pte_t *) page_table_alloc(&init_mm); 81 else 82 pte = (pte_t *) memblock_alloc(size, size); 83 if (!pte) 84 return NULL; 85 clear_table((unsigned long *) pte, _PAGE_INVALID, size); 86 return pte; 87 } 88 89 /* 90 * Add a physical memory range to the 1:1 mapping. 91 */ 92 static int vmem_add_mem(unsigned long start, unsigned long size) 93 { 94 unsigned long pgt_prot, sgt_prot, r3_prot; 95 unsigned long pages4k, pages1m, pages2g; 96 unsigned long end = start + size; 97 unsigned long address = start; 98 pgd_t *pg_dir; 99 p4d_t *p4_dir; 100 pud_t *pu_dir; 101 pmd_t *pm_dir; 102 pte_t *pt_dir; 103 int ret = -ENOMEM; 104 105 pgt_prot = pgprot_val(PAGE_KERNEL); 106 sgt_prot = pgprot_val(SEGMENT_KERNEL); 107 r3_prot = pgprot_val(REGION3_KERNEL); 108 if (!MACHINE_HAS_NX) { 109 pgt_prot &= ~_PAGE_NOEXEC; 110 sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC; 111 r3_prot &= ~_REGION_ENTRY_NOEXEC; 112 } 113 pages4k = pages1m = pages2g = 0; 114 while (address < end) { 115 pg_dir = pgd_offset_k(address); 116 if (pgd_none(*pg_dir)) { 117 p4_dir = vmem_p4d_alloc(); 118 if (!p4_dir) 119 goto out; 120 pgd_populate(&init_mm, pg_dir, p4_dir); 121 } 122 p4_dir = p4d_offset(pg_dir, address); 123 if (p4d_none(*p4_dir)) { 124 pu_dir = vmem_pud_alloc(); 125 if (!pu_dir) 126 goto out; 127 p4d_populate(&init_mm, p4_dir, pu_dir); 128 } 129 pu_dir = pud_offset(p4_dir, address); 130 if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address && 131 !(address & ~PUD_MASK) && (address + PUD_SIZE <= end) && 132 !debug_pagealloc_enabled()) { 133 pud_val(*pu_dir) = address | r3_prot; 134 address += PUD_SIZE; 135 pages2g++; 136 continue; 137 } 138 if (pud_none(*pu_dir)) { 139 pm_dir = vmem_pmd_alloc(); 140 if (!pm_dir) 141 goto out; 142 pud_populate(&init_mm, pu_dir, pm_dir); 143 } 144 pm_dir = pmd_offset(pu_dir, address); 145 if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address && 146 !(address & ~PMD_MASK) && (address + PMD_SIZE <= end) && 147 !debug_pagealloc_enabled()) { 148 pmd_val(*pm_dir) = address | sgt_prot; 149 address += PMD_SIZE; 150 pages1m++; 151 continue; 152 } 153 if (pmd_none(*pm_dir)) { 154 pt_dir = vmem_pte_alloc(); 155 if (!pt_dir) 156 goto out; 157 pmd_populate(&init_mm, pm_dir, pt_dir); 158 } 159 160 pt_dir = pte_offset_kernel(pm_dir, address); 161 pte_val(*pt_dir) = address | pgt_prot; 162 address += PAGE_SIZE; 163 pages4k++; 164 } 165 ret = 0; 166 out: 167 update_page_count(PG_DIRECT_MAP_4K, pages4k); 168 update_page_count(PG_DIRECT_MAP_1M, pages1m); 169 update_page_count(PG_DIRECT_MAP_2G, pages2g); 170 return ret; 171 } 172 173 /* 174 * Remove a physical memory range from the 1:1 mapping. 175 * Currently only invalidates page table entries. 176 */ 177 static void vmem_remove_range(unsigned long start, unsigned long size) 178 { 179 unsigned long pages4k, pages1m, pages2g; 180 unsigned long end = start + size; 181 unsigned long address = start; 182 pgd_t *pg_dir; 183 p4d_t *p4_dir; 184 pud_t *pu_dir; 185 pmd_t *pm_dir; 186 pte_t *pt_dir; 187 188 pages4k = pages1m = pages2g = 0; 189 while (address < end) { 190 pg_dir = pgd_offset_k(address); 191 if (pgd_none(*pg_dir)) { 192 address += PGDIR_SIZE; 193 continue; 194 } 195 p4_dir = p4d_offset(pg_dir, address); 196 if (p4d_none(*p4_dir)) { 197 address += P4D_SIZE; 198 continue; 199 } 200 pu_dir = pud_offset(p4_dir, address); 201 if (pud_none(*pu_dir)) { 202 address += PUD_SIZE; 203 continue; 204 } 205 if (pud_large(*pu_dir)) { 206 pud_clear(pu_dir); 207 address += PUD_SIZE; 208 pages2g++; 209 continue; 210 } 211 pm_dir = pmd_offset(pu_dir, address); 212 if (pmd_none(*pm_dir)) { 213 address += PMD_SIZE; 214 continue; 215 } 216 if (pmd_large(*pm_dir)) { 217 pmd_clear(pm_dir); 218 address += PMD_SIZE; 219 pages1m++; 220 continue; 221 } 222 pt_dir = pte_offset_kernel(pm_dir, address); 223 pte_clear(&init_mm, address, pt_dir); 224 address += PAGE_SIZE; 225 pages4k++; 226 } 227 flush_tlb_kernel_range(start, end); 228 update_page_count(PG_DIRECT_MAP_4K, -pages4k); 229 update_page_count(PG_DIRECT_MAP_1M, -pages1m); 230 update_page_count(PG_DIRECT_MAP_2G, -pages2g); 231 } 232 233 /* 234 * Add a backed mem_map array to the virtual mem_map array. 235 */ 236 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) 237 { 238 unsigned long pgt_prot, sgt_prot; 239 unsigned long address = start; 240 pgd_t *pg_dir; 241 p4d_t *p4_dir; 242 pud_t *pu_dir; 243 pmd_t *pm_dir; 244 pte_t *pt_dir; 245 int ret = -ENOMEM; 246 247 pgt_prot = pgprot_val(PAGE_KERNEL); 248 sgt_prot = pgprot_val(SEGMENT_KERNEL); 249 if (!MACHINE_HAS_NX) { 250 pgt_prot &= ~_PAGE_NOEXEC; 251 sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC; 252 } 253 for (address = start; address < end;) { 254 pg_dir = pgd_offset_k(address); 255 if (pgd_none(*pg_dir)) { 256 p4_dir = vmem_p4d_alloc(); 257 if (!p4_dir) 258 goto out; 259 pgd_populate(&init_mm, pg_dir, p4_dir); 260 } 261 262 p4_dir = p4d_offset(pg_dir, address); 263 if (p4d_none(*p4_dir)) { 264 pu_dir = vmem_pud_alloc(); 265 if (!pu_dir) 266 goto out; 267 p4d_populate(&init_mm, p4_dir, pu_dir); 268 } 269 270 pu_dir = pud_offset(p4_dir, address); 271 if (pud_none(*pu_dir)) { 272 pm_dir = vmem_pmd_alloc(); 273 if (!pm_dir) 274 goto out; 275 pud_populate(&init_mm, pu_dir, pm_dir); 276 } 277 278 pm_dir = pmd_offset(pu_dir, address); 279 if (pmd_none(*pm_dir)) { 280 /* Use 1MB frames for vmemmap if available. We always 281 * use large frames even if they are only partially 282 * used. 283 * Otherwise we would have also page tables since 284 * vmemmap_populate gets called for each section 285 * separately. */ 286 if (MACHINE_HAS_EDAT1) { 287 void *new_page; 288 289 new_page = vmemmap_alloc_block(PMD_SIZE, node); 290 if (!new_page) 291 goto out; 292 pmd_val(*pm_dir) = __pa(new_page) | sgt_prot; 293 address = (address + PMD_SIZE) & PMD_MASK; 294 continue; 295 } 296 pt_dir = vmem_pte_alloc(); 297 if (!pt_dir) 298 goto out; 299 pmd_populate(&init_mm, pm_dir, pt_dir); 300 } else if (pmd_large(*pm_dir)) { 301 address = (address + PMD_SIZE) & PMD_MASK; 302 continue; 303 } 304 305 pt_dir = pte_offset_kernel(pm_dir, address); 306 if (pte_none(*pt_dir)) { 307 void *new_page; 308 309 new_page = vmemmap_alloc_block(PAGE_SIZE, node); 310 if (!new_page) 311 goto out; 312 pte_val(*pt_dir) = __pa(new_page) | pgt_prot; 313 } 314 address += PAGE_SIZE; 315 } 316 ret = 0; 317 out: 318 return ret; 319 } 320 321 void vmemmap_free(unsigned long start, unsigned long end) 322 { 323 } 324 325 /* 326 * Add memory segment to the segment list if it doesn't overlap with 327 * an already present segment. 328 */ 329 static int insert_memory_segment(struct memory_segment *seg) 330 { 331 struct memory_segment *tmp; 332 333 if (seg->start + seg->size > VMEM_MAX_PHYS || 334 seg->start + seg->size < seg->start) 335 return -ERANGE; 336 337 list_for_each_entry(tmp, &mem_segs, list) { 338 if (seg->start >= tmp->start + tmp->size) 339 continue; 340 if (seg->start + seg->size <= tmp->start) 341 continue; 342 return -ENOSPC; 343 } 344 list_add(&seg->list, &mem_segs); 345 return 0; 346 } 347 348 /* 349 * Remove memory segment from the segment list. 350 */ 351 static void remove_memory_segment(struct memory_segment *seg) 352 { 353 list_del(&seg->list); 354 } 355 356 static void __remove_shared_memory(struct memory_segment *seg) 357 { 358 remove_memory_segment(seg); 359 vmem_remove_range(seg->start, seg->size); 360 } 361 362 int vmem_remove_mapping(unsigned long start, unsigned long size) 363 { 364 struct memory_segment *seg; 365 int ret; 366 367 mutex_lock(&vmem_mutex); 368 369 ret = -ENOENT; 370 list_for_each_entry(seg, &mem_segs, list) { 371 if (seg->start == start && seg->size == size) 372 break; 373 } 374 375 if (seg->start != start || seg->size != size) 376 goto out; 377 378 ret = 0; 379 __remove_shared_memory(seg); 380 kfree(seg); 381 out: 382 mutex_unlock(&vmem_mutex); 383 return ret; 384 } 385 386 int vmem_add_mapping(unsigned long start, unsigned long size) 387 { 388 struct memory_segment *seg; 389 int ret; 390 391 mutex_lock(&vmem_mutex); 392 ret = -ENOMEM; 393 seg = kzalloc(sizeof(*seg), GFP_KERNEL); 394 if (!seg) 395 goto out; 396 seg->start = start; 397 seg->size = size; 398 399 ret = insert_memory_segment(seg); 400 if (ret) 401 goto out_free; 402 403 ret = vmem_add_mem(start, size); 404 if (ret) 405 goto out_remove; 406 goto out; 407 408 out_remove: 409 __remove_shared_memory(seg); 410 out_free: 411 kfree(seg); 412 out: 413 mutex_unlock(&vmem_mutex); 414 return ret; 415 } 416 417 /* 418 * map whole physical memory to virtual memory (identity mapping) 419 * we reserve enough space in the vmalloc area for vmemmap to hotplug 420 * additional memory segments. 421 */ 422 void __init vmem_map_init(void) 423 { 424 struct memblock_region *reg; 425 426 for_each_memblock(memory, reg) 427 vmem_add_mem(reg->base, reg->size); 428 __set_memory((unsigned long) _stext, 429 (_etext - _stext) >> PAGE_SHIFT, 430 SET_MEMORY_RO | SET_MEMORY_X); 431 __set_memory((unsigned long) _etext, 432 (_eshared - _etext) >> PAGE_SHIFT, 433 SET_MEMORY_RO); 434 __set_memory((unsigned long) _sinittext, 435 (_einittext - _sinittext) >> PAGE_SHIFT, 436 SET_MEMORY_RO | SET_MEMORY_X); 437 pr_info("Write protected kernel read-only data: %luk\n", 438 (_eshared - _stext) >> 10); 439 } 440 441 /* 442 * Convert memblock.memory to a memory segment list so there is a single 443 * list that contains all memory segments. 444 */ 445 static int __init vmem_convert_memory_chunk(void) 446 { 447 struct memblock_region *reg; 448 struct memory_segment *seg; 449 450 mutex_lock(&vmem_mutex); 451 for_each_memblock(memory, reg) { 452 seg = kzalloc(sizeof(*seg), GFP_KERNEL); 453 if (!seg) 454 panic("Out of memory...\n"); 455 seg->start = reg->base; 456 seg->size = reg->size; 457 insert_memory_segment(seg); 458 } 459 mutex_unlock(&vmem_mutex); 460 return 0; 461 } 462 463 core_initcall(vmem_convert_memory_chunk); 464