1 /* 2 * sparse memory mappings. 3 */ 4 #include <linux/mm.h> 5 #include <linux/mmzone.h> 6 #include <linux/bootmem.h> 7 #include <linux/highmem.h> 8 #include <linux/module.h> 9 #include <linux/spinlock.h> 10 #include <linux/vmalloc.h> 11 #include <asm/dma.h> 12 13 /* 14 * Permanent SPARSEMEM data: 15 * 16 * 1) mem_section - memory sections, mem_map's for valid memory 17 */ 18 #ifdef CONFIG_SPARSEMEM_EXTREME 19 struct mem_section *mem_section[NR_SECTION_ROOTS] 20 ____cacheline_internodealigned_in_smp; 21 #else 22 struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT] 23 ____cacheline_internodealigned_in_smp; 24 #endif 25 EXPORT_SYMBOL(mem_section); 26 27 #ifdef NODE_NOT_IN_PAGE_FLAGS 28 /* 29 * If we did not store the node number in the page then we have to 30 * do a lookup in the section_to_node_table in order to find which 31 * node the page belongs to. 32 */ 33 #if MAX_NUMNODES <= 256 34 static u8 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned; 35 #else 36 static u16 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned; 37 #endif 38 39 int page_to_nid(struct page *page) 40 { 41 return section_to_node_table[page_to_section(page)]; 42 } 43 EXPORT_SYMBOL(page_to_nid); 44 #endif 45 46 #ifdef CONFIG_SPARSEMEM_EXTREME 47 static struct mem_section noinline __init_refok *sparse_index_alloc(int nid) 48 { 49 struct mem_section *section = NULL; 50 unsigned long array_size = SECTIONS_PER_ROOT * 51 sizeof(struct mem_section); 52 53 if (slab_is_available()) 54 section = kmalloc_node(array_size, GFP_KERNEL, nid); 55 else 56 section = alloc_bootmem_node(NODE_DATA(nid), array_size); 57 58 if (section) 59 memset(section, 0, array_size); 60 61 return section; 62 } 63 64 static int __meminit sparse_index_init(unsigned long section_nr, int nid) 65 { 66 static DEFINE_SPINLOCK(index_init_lock); 67 unsigned long root = SECTION_NR_TO_ROOT(section_nr); 68 struct mem_section *section; 69 int ret = 0; 70 71 #ifdef NODE_NOT_IN_PAGE_FLAGS 72 section_to_node_table[section_nr] = nid; 73 #endif 74 75 if (mem_section[root]) 76 return -EEXIST; 77 78 section = sparse_index_alloc(nid); 79 /* 80 * This lock keeps two different sections from 81 * reallocating for the same index 82 */ 83 spin_lock(&index_init_lock); 84 85 if (mem_section[root]) { 86 ret = -EEXIST; 87 goto out; 88 } 89 90 mem_section[root] = section; 91 out: 92 spin_unlock(&index_init_lock); 93 return ret; 94 } 95 #else /* !SPARSEMEM_EXTREME */ 96 static inline int sparse_index_init(unsigned long section_nr, int nid) 97 { 98 return 0; 99 } 100 #endif 101 102 /* 103 * Although written for the SPARSEMEM_EXTREME case, this happens 104 * to also work for the flat array case becase 105 * NR_SECTION_ROOTS==NR_MEM_SECTIONS. 106 */ 107 int __section_nr(struct mem_section* ms) 108 { 109 unsigned long root_nr; 110 struct mem_section* root; 111 112 for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) { 113 root = __nr_to_section(root_nr * SECTIONS_PER_ROOT); 114 if (!root) 115 continue; 116 117 if ((ms >= root) && (ms < (root + SECTIONS_PER_ROOT))) 118 break; 119 } 120 121 return (root_nr * SECTIONS_PER_ROOT) + (ms - root); 122 } 123 124 /* 125 * During early boot, before section_mem_map is used for an actual 126 * mem_map, we use section_mem_map to store the section's NUMA 127 * node. This keeps us from having to use another data structure. The 128 * node information is cleared just before we store the real mem_map. 129 */ 130 static inline unsigned long sparse_encode_early_nid(int nid) 131 { 132 return (nid << SECTION_NID_SHIFT); 133 } 134 135 static inline int sparse_early_nid(struct mem_section *section) 136 { 137 return (section->section_mem_map >> SECTION_NID_SHIFT); 138 } 139 140 /* Record a memory area against a node. */ 141 void __init memory_present(int nid, unsigned long start, unsigned long end) 142 { 143 unsigned long pfn; 144 145 start &= PAGE_SECTION_MASK; 146 for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) { 147 unsigned long section = pfn_to_section_nr(pfn); 148 struct mem_section *ms; 149 150 sparse_index_init(section, nid); 151 152 ms = __nr_to_section(section); 153 if (!ms->section_mem_map) 154 ms->section_mem_map = sparse_encode_early_nid(nid) | 155 SECTION_MARKED_PRESENT; 156 } 157 } 158 159 /* 160 * Only used by the i386 NUMA architecures, but relatively 161 * generic code. 162 */ 163 unsigned long __init node_memmap_size_bytes(int nid, unsigned long start_pfn, 164 unsigned long end_pfn) 165 { 166 unsigned long pfn; 167 unsigned long nr_pages = 0; 168 169 for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { 170 if (nid != early_pfn_to_nid(pfn)) 171 continue; 172 173 if (pfn_valid(pfn)) 174 nr_pages += PAGES_PER_SECTION; 175 } 176 177 return nr_pages * sizeof(struct page); 178 } 179 180 /* 181 * Subtle, we encode the real pfn into the mem_map such that 182 * the identity pfn - section_mem_map will return the actual 183 * physical page frame number. 184 */ 185 static unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long pnum) 186 { 187 return (unsigned long)(mem_map - (section_nr_to_pfn(pnum))); 188 } 189 190 /* 191 * We need this if we ever free the mem_maps. While not implemented yet, 192 * this function is included for parity with its sibling. 193 */ 194 static __attribute((unused)) 195 struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum) 196 { 197 return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum); 198 } 199 200 static int __meminit sparse_init_one_section(struct mem_section *ms, 201 unsigned long pnum, struct page *mem_map) 202 { 203 if (!valid_section(ms)) 204 return -EINVAL; 205 206 ms->section_mem_map &= ~SECTION_MAP_MASK; 207 ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum); 208 209 return 1; 210 } 211 212 __attribute__((weak)) __init 213 void *alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size) 214 { 215 return NULL; 216 } 217 218 static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum) 219 { 220 struct page *map; 221 struct mem_section *ms = __nr_to_section(pnum); 222 int nid = sparse_early_nid(ms); 223 224 map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION); 225 if (map) 226 return map; 227 228 map = alloc_bootmem_high_node(NODE_DATA(nid), 229 sizeof(struct page) * PAGES_PER_SECTION); 230 if (map) 231 return map; 232 233 map = alloc_bootmem_node(NODE_DATA(nid), 234 sizeof(struct page) * PAGES_PER_SECTION); 235 if (map) 236 return map; 237 238 printk(KERN_WARNING "%s: allocation failed\n", __FUNCTION__); 239 ms->section_mem_map = 0; 240 return NULL; 241 } 242 243 /* 244 * Allocate the accumulated non-linear sections, allocate a mem_map 245 * for each and record the physical to section mapping. 246 */ 247 void __init sparse_init(void) 248 { 249 unsigned long pnum; 250 struct page *map; 251 252 for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { 253 if (!valid_section_nr(pnum)) 254 continue; 255 256 map = sparse_early_mem_map_alloc(pnum); 257 if (!map) 258 continue; 259 sparse_init_one_section(__nr_to_section(pnum), pnum, map); 260 } 261 } 262 263 #ifdef CONFIG_MEMORY_HOTPLUG 264 static struct page *__kmalloc_section_memmap(unsigned long nr_pages) 265 { 266 struct page *page, *ret; 267 unsigned long memmap_size = sizeof(struct page) * nr_pages; 268 269 page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size)); 270 if (page) 271 goto got_map_page; 272 273 ret = vmalloc(memmap_size); 274 if (ret) 275 goto got_map_ptr; 276 277 return NULL; 278 got_map_page: 279 ret = (struct page *)pfn_to_kaddr(page_to_pfn(page)); 280 got_map_ptr: 281 memset(ret, 0, memmap_size); 282 283 return ret; 284 } 285 286 static int vaddr_in_vmalloc_area(void *addr) 287 { 288 if (addr >= (void *)VMALLOC_START && 289 addr < (void *)VMALLOC_END) 290 return 1; 291 return 0; 292 } 293 294 static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages) 295 { 296 if (vaddr_in_vmalloc_area(memmap)) 297 vfree(memmap); 298 else 299 free_pages((unsigned long)memmap, 300 get_order(sizeof(struct page) * nr_pages)); 301 } 302 303 /* 304 * returns the number of sections whose mem_maps were properly 305 * set. If this is <=0, then that means that the passed-in 306 * map was not consumed and must be freed. 307 */ 308 int sparse_add_one_section(struct zone *zone, unsigned long start_pfn, 309 int nr_pages) 310 { 311 unsigned long section_nr = pfn_to_section_nr(start_pfn); 312 struct pglist_data *pgdat = zone->zone_pgdat; 313 struct mem_section *ms; 314 struct page *memmap; 315 unsigned long flags; 316 int ret; 317 318 /* 319 * no locking for this, because it does its own 320 * plus, it does a kmalloc 321 */ 322 sparse_index_init(section_nr, pgdat->node_id); 323 memmap = __kmalloc_section_memmap(nr_pages); 324 325 pgdat_resize_lock(pgdat, &flags); 326 327 ms = __pfn_to_section(start_pfn); 328 if (ms->section_mem_map & SECTION_MARKED_PRESENT) { 329 ret = -EEXIST; 330 goto out; 331 } 332 ms->section_mem_map |= SECTION_MARKED_PRESENT; 333 334 ret = sparse_init_one_section(ms, section_nr, memmap); 335 336 out: 337 pgdat_resize_unlock(pgdat, &flags); 338 if (ret <= 0) 339 __kfree_section_memmap(memmap, nr_pages); 340 return ret; 341 } 342 #endif 343