1 /* 2 * Copyright IBM Corp. 2006 3 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> 4 */ 5 6 #include <linux/bootmem.h> 7 #include <linux/pfn.h> 8 #include <linux/mm.h> 9 #include <linux/init.h> 10 #include <linux/list.h> 11 #include <linux/hugetlb.h> 12 #include <linux/slab.h> 13 #include <linux/memblock.h> 14 #include <asm/cacheflush.h> 15 #include <asm/pgalloc.h> 16 #include <asm/pgtable.h> 17 #include <asm/setup.h> 18 #include <asm/tlbflush.h> 19 #include <asm/sections.h> 20 21 static DEFINE_MUTEX(vmem_mutex); 22 23 struct memory_segment { 24 struct list_head list; 25 unsigned long start; 26 unsigned long size; 27 }; 28 29 static LIST_HEAD(mem_segs); 30 31 static void __ref *vmem_alloc_pages(unsigned int order) 32 { 33 unsigned long size = PAGE_SIZE << order; 34 35 if (slab_is_available()) 36 return (void *)__get_free_pages(GFP_KERNEL, order); 37 return (void *) memblock_alloc(size, size); 38 } 39 40 static inline pud_t *vmem_pud_alloc(void) 41 { 42 pud_t *pud = NULL; 43 44 pud = vmem_alloc_pages(2); 45 if (!pud) 46 return NULL; 47 clear_table((unsigned long *) pud, _REGION3_ENTRY_EMPTY, PAGE_SIZE * 4); 48 return pud; 49 } 50 51 pmd_t *vmem_pmd_alloc(void) 52 { 53 pmd_t *pmd = NULL; 54 55 pmd = vmem_alloc_pages(2); 56 if (!pmd) 57 return NULL; 58 clear_table((unsigned long *) pmd, _SEGMENT_ENTRY_EMPTY, PAGE_SIZE * 4); 59 return pmd; 60 } 61 62 pte_t __ref *vmem_pte_alloc(void) 63 { 64 unsigned long size = PTRS_PER_PTE * sizeof(pte_t); 65 pte_t *pte; 66 67 if (slab_is_available()) 68 pte = (pte_t *) page_table_alloc(&init_mm); 69 else 70 pte = (pte_t *) memblock_alloc(size, size); 71 if (!pte) 72 return NULL; 73 clear_table((unsigned long *) pte, _PAGE_INVALID, size); 74 return pte; 75 } 76 77 /* 78 * Add a physical memory range to the 1:1 mapping. 79 */ 80 static int vmem_add_mem(unsigned long start, unsigned long size) 81 { 82 unsigned long pgt_prot, sgt_prot, r3_prot; 83 unsigned long pages4k, pages1m, pages2g; 84 unsigned long end = start + size; 85 unsigned long address = start; 86 pgd_t *pg_dir; 87 pud_t *pu_dir; 88 pmd_t *pm_dir; 89 pte_t *pt_dir; 90 int ret = -ENOMEM; 91 92 pgt_prot = pgprot_val(PAGE_KERNEL); 93 sgt_prot = pgprot_val(SEGMENT_KERNEL); 94 r3_prot = pgprot_val(REGION3_KERNEL); 95 if (!MACHINE_HAS_NX) { 96 pgt_prot &= ~_PAGE_NOEXEC; 97 sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC; 98 r3_prot &= ~_REGION_ENTRY_NOEXEC; 99 } 100 pages4k = pages1m = pages2g = 0; 101 while (address < end) { 102 pg_dir = pgd_offset_k(address); 103 if (pgd_none(*pg_dir)) { 104 pu_dir = vmem_pud_alloc(); 105 if (!pu_dir) 106 goto out; 107 pgd_populate(&init_mm, pg_dir, pu_dir); 108 } 109 pu_dir = pud_offset(pg_dir, address); 110 if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address && 111 !(address & ~PUD_MASK) && (address + PUD_SIZE <= end) && 112 !debug_pagealloc_enabled()) { 113 pud_val(*pu_dir) = address | r3_prot; 114 address += PUD_SIZE; 115 pages2g++; 116 continue; 117 } 118 if (pud_none(*pu_dir)) { 119 pm_dir = vmem_pmd_alloc(); 120 if (!pm_dir) 121 goto out; 122 pud_populate(&init_mm, pu_dir, pm_dir); 123 } 124 pm_dir = pmd_offset(pu_dir, address); 125 if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address && 126 !(address & ~PMD_MASK) && (address + PMD_SIZE <= end) && 127 !debug_pagealloc_enabled()) { 128 pmd_val(*pm_dir) = address | sgt_prot; 129 address += PMD_SIZE; 130 pages1m++; 131 continue; 132 } 133 if (pmd_none(*pm_dir)) { 134 pt_dir = vmem_pte_alloc(); 135 if (!pt_dir) 136 goto out; 137 pmd_populate(&init_mm, pm_dir, pt_dir); 138 } 139 140 pt_dir = pte_offset_kernel(pm_dir, address); 141 pte_val(*pt_dir) = address | pgt_prot; 142 address += PAGE_SIZE; 143 pages4k++; 144 } 145 ret = 0; 146 out: 147 update_page_count(PG_DIRECT_MAP_4K, pages4k); 148 update_page_count(PG_DIRECT_MAP_1M, pages1m); 149 update_page_count(PG_DIRECT_MAP_2G, pages2g); 150 return ret; 151 } 152 153 /* 154 * Remove a physical memory range from the 1:1 mapping. 155 * Currently only invalidates page table entries. 156 */ 157 static void vmem_remove_range(unsigned long start, unsigned long size) 158 { 159 unsigned long pages4k, pages1m, pages2g; 160 unsigned long end = start + size; 161 unsigned long address = start; 162 pgd_t *pg_dir; 163 pud_t *pu_dir; 164 pmd_t *pm_dir; 165 pte_t *pt_dir; 166 167 pages4k = pages1m = pages2g = 0; 168 while (address < end) { 169 pg_dir = pgd_offset_k(address); 170 if (pgd_none(*pg_dir)) { 171 address += PGDIR_SIZE; 172 continue; 173 } 174 pu_dir = pud_offset(pg_dir, address); 175 if (pud_none(*pu_dir)) { 176 address += PUD_SIZE; 177 continue; 178 } 179 if (pud_large(*pu_dir)) { 180 pud_clear(pu_dir); 181 address += PUD_SIZE; 182 pages2g++; 183 continue; 184 } 185 pm_dir = pmd_offset(pu_dir, address); 186 if (pmd_none(*pm_dir)) { 187 address += PMD_SIZE; 188 continue; 189 } 190 if (pmd_large(*pm_dir)) { 191 pmd_clear(pm_dir); 192 address += PMD_SIZE; 193 pages1m++; 194 continue; 195 } 196 pt_dir = pte_offset_kernel(pm_dir, address); 197 pte_clear(&init_mm, address, pt_dir); 198 address += PAGE_SIZE; 199 pages4k++; 200 } 201 flush_tlb_kernel_range(start, end); 202 update_page_count(PG_DIRECT_MAP_4K, -pages4k); 203 update_page_count(PG_DIRECT_MAP_1M, -pages1m); 204 update_page_count(PG_DIRECT_MAP_2G, -pages2g); 205 } 206 207 /* 208 * Add a backed mem_map array to the virtual mem_map array. 209 */ 210 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) 211 { 212 unsigned long pgt_prot, sgt_prot; 213 unsigned long address = start; 214 pgd_t *pg_dir; 215 pud_t *pu_dir; 216 pmd_t *pm_dir; 217 pte_t *pt_dir; 218 int ret = -ENOMEM; 219 220 pgt_prot = pgprot_val(PAGE_KERNEL); 221 sgt_prot = pgprot_val(SEGMENT_KERNEL); 222 if (!MACHINE_HAS_NX) { 223 pgt_prot &= ~_PAGE_NOEXEC; 224 sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC; 225 } 226 for (address = start; address < end;) { 227 pg_dir = pgd_offset_k(address); 228 if (pgd_none(*pg_dir)) { 229 pu_dir = vmem_pud_alloc(); 230 if (!pu_dir) 231 goto out; 232 pgd_populate(&init_mm, pg_dir, pu_dir); 233 } 234 235 pu_dir = pud_offset(pg_dir, address); 236 if (pud_none(*pu_dir)) { 237 pm_dir = vmem_pmd_alloc(); 238 if (!pm_dir) 239 goto out; 240 pud_populate(&init_mm, pu_dir, pm_dir); 241 } 242 243 pm_dir = pmd_offset(pu_dir, address); 244 if (pmd_none(*pm_dir)) { 245 /* Use 1MB frames for vmemmap if available. We always 246 * use large frames even if they are only partially 247 * used. 248 * Otherwise we would have also page tables since 249 * vmemmap_populate gets called for each section 250 * separately. */ 251 if (MACHINE_HAS_EDAT1) { 252 void *new_page; 253 254 new_page = vmemmap_alloc_block(PMD_SIZE, node); 255 if (!new_page) 256 goto out; 257 pmd_val(*pm_dir) = __pa(new_page) | sgt_prot; 258 address = (address + PMD_SIZE) & PMD_MASK; 259 continue; 260 } 261 pt_dir = vmem_pte_alloc(); 262 if (!pt_dir) 263 goto out; 264 pmd_populate(&init_mm, pm_dir, pt_dir); 265 } else if (pmd_large(*pm_dir)) { 266 address = (address + PMD_SIZE) & PMD_MASK; 267 continue; 268 } 269 270 pt_dir = pte_offset_kernel(pm_dir, address); 271 if (pte_none(*pt_dir)) { 272 void *new_page; 273 274 new_page = vmemmap_alloc_block(PAGE_SIZE, node); 275 if (!new_page) 276 goto out; 277 pte_val(*pt_dir) = __pa(new_page) | pgt_prot; 278 } 279 address += PAGE_SIZE; 280 } 281 ret = 0; 282 out: 283 return ret; 284 } 285 286 void vmemmap_free(unsigned long start, unsigned long end) 287 { 288 } 289 290 /* 291 * Add memory segment to the segment list if it doesn't overlap with 292 * an already present segment. 293 */ 294 static int insert_memory_segment(struct memory_segment *seg) 295 { 296 struct memory_segment *tmp; 297 298 if (seg->start + seg->size > VMEM_MAX_PHYS || 299 seg->start + seg->size < seg->start) 300 return -ERANGE; 301 302 list_for_each_entry(tmp, &mem_segs, list) { 303 if (seg->start >= tmp->start + tmp->size) 304 continue; 305 if (seg->start + seg->size <= tmp->start) 306 continue; 307 return -ENOSPC; 308 } 309 list_add(&seg->list, &mem_segs); 310 return 0; 311 } 312 313 /* 314 * Remove memory segment from the segment list. 315 */ 316 static void remove_memory_segment(struct memory_segment *seg) 317 { 318 list_del(&seg->list); 319 } 320 321 static void __remove_shared_memory(struct memory_segment *seg) 322 { 323 remove_memory_segment(seg); 324 vmem_remove_range(seg->start, seg->size); 325 } 326 327 int vmem_remove_mapping(unsigned long start, unsigned long size) 328 { 329 struct memory_segment *seg; 330 int ret; 331 332 mutex_lock(&vmem_mutex); 333 334 ret = -ENOENT; 335 list_for_each_entry(seg, &mem_segs, list) { 336 if (seg->start == start && seg->size == size) 337 break; 338 } 339 340 if (seg->start != start || seg->size != size) 341 goto out; 342 343 ret = 0; 344 __remove_shared_memory(seg); 345 kfree(seg); 346 out: 347 mutex_unlock(&vmem_mutex); 348 return ret; 349 } 350 351 int vmem_add_mapping(unsigned long start, unsigned long size) 352 { 353 struct memory_segment *seg; 354 int ret; 355 356 mutex_lock(&vmem_mutex); 357 ret = -ENOMEM; 358 seg = kzalloc(sizeof(*seg), GFP_KERNEL); 359 if (!seg) 360 goto out; 361 seg->start = start; 362 seg->size = size; 363 364 ret = insert_memory_segment(seg); 365 if (ret) 366 goto out_free; 367 368 ret = vmem_add_mem(start, size); 369 if (ret) 370 goto out_remove; 371 goto out; 372 373 out_remove: 374 __remove_shared_memory(seg); 375 out_free: 376 kfree(seg); 377 out: 378 mutex_unlock(&vmem_mutex); 379 return ret; 380 } 381 382 /* 383 * map whole physical memory to virtual memory (identity mapping) 384 * we reserve enough space in the vmalloc area for vmemmap to hotplug 385 * additional memory segments. 386 */ 387 void __init vmem_map_init(void) 388 { 389 struct memblock_region *reg; 390 391 for_each_memblock(memory, reg) 392 vmem_add_mem(reg->base, reg->size); 393 __set_memory((unsigned long) _stext, 394 (_etext - _stext) >> PAGE_SHIFT, 395 SET_MEMORY_RO | SET_MEMORY_X); 396 __set_memory((unsigned long) _etext, 397 (_eshared - _etext) >> PAGE_SHIFT, 398 SET_MEMORY_RO); 399 __set_memory((unsigned long) _sinittext, 400 (_einittext - _sinittext) >> PAGE_SHIFT, 401 SET_MEMORY_RO | SET_MEMORY_X); 402 pr_info("Write protected kernel read-only data: %luk\n", 403 (_eshared - _stext) >> 10); 404 } 405 406 /* 407 * Convert memblock.memory to a memory segment list so there is a single 408 * list that contains all memory segments. 409 */ 410 static int __init vmem_convert_memory_chunk(void) 411 { 412 struct memblock_region *reg; 413 struct memory_segment *seg; 414 415 mutex_lock(&vmem_mutex); 416 for_each_memblock(memory, reg) { 417 seg = kzalloc(sizeof(*seg), GFP_KERNEL); 418 if (!seg) 419 panic("Out of memory...\n"); 420 seg->start = reg->base; 421 seg->size = reg->size; 422 insert_memory_segment(seg); 423 } 424 mutex_unlock(&vmem_mutex); 425 return 0; 426 } 427 428 core_initcall(vmem_convert_memory_chunk); 429