1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Author: Xiang Gao <gaoxiang@loongson.cn> 4 * Huacai Chen <chenhuacai@loongson.cn> 5 * 6 * Copyright (C) 2020-2022 Loongson Technology Corporation Limited 7 */ 8 #include <linux/init.h> 9 #include <linux/kernel.h> 10 #include <linux/mm.h> 11 #include <linux/mmzone.h> 12 #include <linux/export.h> 13 #include <linux/nodemask.h> 14 #include <linux/swap.h> 15 #include <linux/memblock.h> 16 #include <linux/pfn.h> 17 #include <linux/acpi.h> 18 #include <linux/efi.h> 19 #include <linux/irq.h> 20 #include <linux/pci.h> 21 #include <asm/bootinfo.h> 22 #include <asm/loongson.h> 23 #include <asm/numa.h> 24 #include <asm/page.h> 25 #include <asm/pgalloc.h> 26 #include <asm/sections.h> 27 #include <asm/time.h> 28 29 int numa_off; 30 struct pglist_data *node_data[MAX_NUMNODES]; 31 unsigned char node_distances[MAX_NUMNODES][MAX_NUMNODES]; 32 33 EXPORT_SYMBOL(node_data); 34 EXPORT_SYMBOL(node_distances); 35 36 static struct numa_meminfo numa_meminfo; 37 cpumask_t cpus_on_node[MAX_NUMNODES]; 38 cpumask_t phys_cpus_on_node[MAX_NUMNODES]; 39 EXPORT_SYMBOL(cpus_on_node); 40 41 /* 42 * apicid, cpu, node mappings 43 */ 44 s16 __cpuid_to_node[CONFIG_NR_CPUS] = { 45 [0 ... CONFIG_NR_CPUS - 1] = NUMA_NO_NODE 46 }; 47 EXPORT_SYMBOL(__cpuid_to_node); 48 49 nodemask_t numa_nodes_parsed __initdata; 50 51 #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA 52 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; 53 EXPORT_SYMBOL(__per_cpu_offset); 54 55 static int __init pcpu_cpu_to_node(int cpu) 56 { 57 return early_cpu_to_node(cpu); 58 } 59 60 static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) 61 { 62 if (early_cpu_to_node(from) == early_cpu_to_node(to)) 63 return LOCAL_DISTANCE; 64 else 65 return REMOTE_DISTANCE; 66 } 67 68 void __init pcpu_populate_pte(unsigned long addr) 69 { 70 pgd_t *pgd = pgd_offset_k(addr); 71 p4d_t *p4d = p4d_offset(pgd, addr); 72 pud_t *pud; 73 pmd_t *pmd; 74 75 if (p4d_none(*p4d)) { 76 pud = memblock_alloc(PAGE_SIZE, PAGE_SIZE); 77 if (!pud) 78 panic("%s: Failed to allocate memory\n", __func__); 79 p4d_populate(&init_mm, p4d, pud); 80 #ifndef __PAGETABLE_PUD_FOLDED 81 pud_init(pud); 82 #endif 83 } 84 85 pud = pud_offset(p4d, addr); 86 if (pud_none(*pud)) { 87 pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE); 88 if (!pmd) 89 panic("%s: Failed to allocate memory\n", __func__); 90 pud_populate(&init_mm, pud, pmd); 91 #ifndef __PAGETABLE_PMD_FOLDED 92 pmd_init(pmd); 93 #endif 94 } 95 96 pmd = pmd_offset(pud, addr); 97 if (!pmd_present(*pmd)) { 98 pte_t *pte; 99 100 pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE); 101 if (!pte) 102 panic("%s: Failed to allocate memory\n", __func__); 103 pmd_populate_kernel(&init_mm, pmd, pte); 104 } 105 } 106 107 void __init setup_per_cpu_areas(void) 108 { 109 unsigned long delta; 110 unsigned int cpu; 111 int rc = -EINVAL; 112 113 if (pcpu_chosen_fc == PCPU_FC_AUTO) { 114 if (nr_node_ids >= 8) 115 pcpu_chosen_fc = PCPU_FC_PAGE; 116 else 117 pcpu_chosen_fc = PCPU_FC_EMBED; 118 } 119 120 /* 121 * Always reserve area for module percpu variables. That's 122 * what the legacy allocator did. 123 */ 124 if (pcpu_chosen_fc != PCPU_FC_PAGE) { 125 rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, 126 PERCPU_DYNAMIC_RESERVE, PMD_SIZE, 127 pcpu_cpu_distance, pcpu_cpu_to_node); 128 if (rc < 0) 129 pr_warn("%s allocator failed (%d), falling back to page size\n", 130 pcpu_fc_names[pcpu_chosen_fc], rc); 131 } 132 if (rc < 0) 133 rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, pcpu_cpu_to_node); 134 if (rc < 0) 135 panic("cannot initialize percpu area (err=%d)", rc); 136 137 delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; 138 for_each_possible_cpu(cpu) 139 __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; 140 } 141 #endif 142 143 /* 144 * Get nodeid by logical cpu number. 145 * __cpuid_to_node maps phyical cpu id to node, so we 146 * should use cpu_logical_map(cpu) to index it. 147 * 148 * This routine is only used in early phase during 149 * booting, after setup_per_cpu_areas calling and numa_node 150 * initialization, cpu_to_node will be used instead. 151 */ 152 int early_cpu_to_node(int cpu) 153 { 154 int physid = cpu_logical_map(cpu); 155 156 if (physid < 0) 157 return NUMA_NO_NODE; 158 159 return __cpuid_to_node[physid]; 160 } 161 162 void __init early_numa_add_cpu(int cpuid, s16 node) 163 { 164 int cpu = __cpu_number_map[cpuid]; 165 166 if (cpu < 0) 167 return; 168 169 cpumask_set_cpu(cpu, &cpus_on_node[node]); 170 cpumask_set_cpu(cpuid, &phys_cpus_on_node[node]); 171 } 172 173 void numa_add_cpu(unsigned int cpu) 174 { 175 int nid = cpu_to_node(cpu); 176 cpumask_set_cpu(cpu, &cpus_on_node[nid]); 177 } 178 179 void numa_remove_cpu(unsigned int cpu) 180 { 181 int nid = cpu_to_node(cpu); 182 cpumask_clear_cpu(cpu, &cpus_on_node[nid]); 183 } 184 185 static int __init numa_add_memblk_to(int nid, u64 start, u64 end, 186 struct numa_meminfo *mi) 187 { 188 /* ignore zero length blks */ 189 if (start == end) 190 return 0; 191 192 /* whine about and ignore invalid blks */ 193 if (start > end || nid < 0 || nid >= MAX_NUMNODES) { 194 pr_warn("NUMA: Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n", 195 nid, start, end - 1); 196 return 0; 197 } 198 199 if (mi->nr_blks >= NR_NODE_MEMBLKS) { 200 pr_err("NUMA: too many memblk ranges\n"); 201 return -EINVAL; 202 } 203 204 mi->blk[mi->nr_blks].start = PFN_ALIGN(start); 205 mi->blk[mi->nr_blks].end = PFN_ALIGN(end - PAGE_SIZE + 1); 206 mi->blk[mi->nr_blks].nid = nid; 207 mi->nr_blks++; 208 return 0; 209 } 210 211 /** 212 * numa_add_memblk - Add one numa_memblk to numa_meminfo 213 * @nid: NUMA node ID of the new memblk 214 * @start: Start address of the new memblk 215 * @end: End address of the new memblk 216 * 217 * Add a new memblk to the default numa_meminfo. 218 * 219 * RETURNS: 220 * 0 on success, -errno on failure. 221 */ 222 int __init numa_add_memblk(int nid, u64 start, u64 end) 223 { 224 return numa_add_memblk_to(nid, start, end, &numa_meminfo); 225 } 226 227 static void __init alloc_node_data(int nid) 228 { 229 void *nd; 230 unsigned long nd_pa; 231 size_t nd_sz = roundup(sizeof(pg_data_t), PAGE_SIZE); 232 233 nd_pa = memblock_phys_alloc_try_nid(nd_sz, SMP_CACHE_BYTES, nid); 234 if (!nd_pa) { 235 pr_err("Cannot find %zu Byte for node_data (initial node: %d)\n", nd_sz, nid); 236 return; 237 } 238 239 nd = __va(nd_pa); 240 241 node_data[nid] = nd; 242 memset(nd, 0, sizeof(pg_data_t)); 243 } 244 245 static void __init node_mem_init(unsigned int node) 246 { 247 unsigned long start_pfn, end_pfn; 248 unsigned long node_addrspace_offset; 249 250 node_addrspace_offset = nid_to_addrbase(node); 251 pr_info("Node%d's addrspace_offset is 0x%lx\n", 252 node, node_addrspace_offset); 253 254 get_pfn_range_for_nid(node, &start_pfn, &end_pfn); 255 pr_info("Node%d: start_pfn=0x%lx, end_pfn=0x%lx\n", 256 node, start_pfn, end_pfn); 257 258 alloc_node_data(node); 259 } 260 261 #ifdef CONFIG_ACPI_NUMA 262 263 /* 264 * Sanity check to catch more bad NUMA configurations (they are amazingly 265 * common). Make sure the nodes cover all memory. 266 */ 267 static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi) 268 { 269 int i; 270 u64 numaram, biosram; 271 272 numaram = 0; 273 for (i = 0; i < mi->nr_blks; i++) { 274 u64 s = mi->blk[i].start >> PAGE_SHIFT; 275 u64 e = mi->blk[i].end >> PAGE_SHIFT; 276 277 numaram += e - s; 278 numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e); 279 if ((s64)numaram < 0) 280 numaram = 0; 281 } 282 max_pfn = max_low_pfn; 283 biosram = max_pfn - absent_pages_in_range(0, max_pfn); 284 285 BUG_ON((s64)(biosram - numaram) >= (1 << (20 - PAGE_SHIFT))); 286 return true; 287 } 288 289 static void __init add_node_intersection(u32 node, u64 start, u64 size, u32 type) 290 { 291 static unsigned long num_physpages; 292 293 num_physpages += (size >> PAGE_SHIFT); 294 pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx Bytes\n", 295 node, type, start, size); 296 pr_info(" start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n", 297 start >> PAGE_SHIFT, (start + size) >> PAGE_SHIFT, num_physpages); 298 memblock_set_node(start, size, &memblock.memory, node); 299 } 300 301 /* 302 * add_numamem_region 303 * 304 * Add a uasable memory region described by BIOS. The 305 * routine gets each intersection between BIOS's region 306 * and node's region, and adds them into node's memblock 307 * pool. 308 * 309 */ 310 static void __init add_numamem_region(u64 start, u64 end, u32 type) 311 { 312 u32 i; 313 u64 ofs = start; 314 315 if (start >= end) { 316 pr_debug("Invalid region: %016llx-%016llx\n", start, end); 317 return; 318 } 319 320 for (i = 0; i < numa_meminfo.nr_blks; i++) { 321 struct numa_memblk *mb = &numa_meminfo.blk[i]; 322 323 if (ofs > mb->end) 324 continue; 325 326 if (end > mb->end) { 327 add_node_intersection(mb->nid, ofs, mb->end - ofs, type); 328 ofs = mb->end; 329 } else { 330 add_node_intersection(mb->nid, ofs, end - ofs, type); 331 break; 332 } 333 } 334 } 335 336 static void __init init_node_memblock(void) 337 { 338 u32 mem_type; 339 u64 mem_end, mem_start, mem_size; 340 efi_memory_desc_t *md; 341 342 /* Parse memory information and activate */ 343 for_each_efi_memory_desc(md) { 344 mem_type = md->type; 345 mem_start = md->phys_addr; 346 mem_size = md->num_pages << EFI_PAGE_SHIFT; 347 mem_end = mem_start + mem_size; 348 349 switch (mem_type) { 350 case EFI_LOADER_CODE: 351 case EFI_LOADER_DATA: 352 case EFI_BOOT_SERVICES_CODE: 353 case EFI_BOOT_SERVICES_DATA: 354 case EFI_PERSISTENT_MEMORY: 355 case EFI_CONVENTIONAL_MEMORY: 356 add_numamem_region(mem_start, mem_end, mem_type); 357 break; 358 case EFI_PAL_CODE: 359 case EFI_UNUSABLE_MEMORY: 360 case EFI_ACPI_RECLAIM_MEMORY: 361 add_numamem_region(mem_start, mem_end, mem_type); 362 fallthrough; 363 case EFI_RESERVED_TYPE: 364 case EFI_RUNTIME_SERVICES_CODE: 365 case EFI_RUNTIME_SERVICES_DATA: 366 case EFI_MEMORY_MAPPED_IO: 367 case EFI_MEMORY_MAPPED_IO_PORT_SPACE: 368 pr_info("Resvd: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx Bytes\n", 369 mem_type, mem_start, mem_size); 370 break; 371 } 372 } 373 } 374 375 static void __init numa_default_distance(void) 376 { 377 int row, col; 378 379 for (row = 0; row < MAX_NUMNODES; row++) 380 for (col = 0; col < MAX_NUMNODES; col++) { 381 if (col == row) 382 node_distances[row][col] = LOCAL_DISTANCE; 383 else 384 /* We assume that one node per package here! 385 * 386 * A SLIT should be used for multiple nodes 387 * per package to override default setting. 388 */ 389 node_distances[row][col] = REMOTE_DISTANCE; 390 } 391 } 392 393 /* 394 * fake_numa_init() - For Non-ACPI systems 395 * Return: 0 on success, -errno on failure. 396 */ 397 static int __init fake_numa_init(void) 398 { 399 phys_addr_t start = memblock_start_of_DRAM(); 400 phys_addr_t end = memblock_end_of_DRAM() - 1; 401 402 node_set(0, numa_nodes_parsed); 403 pr_info("Faking a node at [mem %pap-%pap]\n", &start, &end); 404 405 return numa_add_memblk(0, start, end + 1); 406 } 407 408 int __init init_numa_memory(void) 409 { 410 int i; 411 int ret; 412 int node; 413 414 for (i = 0; i < NR_CPUS; i++) 415 set_cpuid_to_node(i, NUMA_NO_NODE); 416 417 numa_default_distance(); 418 nodes_clear(numa_nodes_parsed); 419 nodes_clear(node_possible_map); 420 nodes_clear(node_online_map); 421 memset(&numa_meminfo, 0, sizeof(numa_meminfo)); 422 423 /* Parse SRAT and SLIT if provided by firmware. */ 424 ret = acpi_disabled ? fake_numa_init() : acpi_numa_init(); 425 if (ret < 0) 426 return ret; 427 428 node_possible_map = numa_nodes_parsed; 429 if (WARN_ON(nodes_empty(node_possible_map))) 430 return -EINVAL; 431 432 init_node_memblock(); 433 if (numa_meminfo_cover_memory(&numa_meminfo) == false) 434 return -EINVAL; 435 436 for_each_node_mask(node, node_possible_map) { 437 node_mem_init(node); 438 node_set_online(node); 439 } 440 max_low_pfn = PHYS_PFN(memblock_end_of_DRAM()); 441 442 setup_nr_node_ids(); 443 loongson_sysconf.nr_nodes = nr_node_ids; 444 loongson_sysconf.cores_per_node = cpumask_weight(&phys_cpus_on_node[0]); 445 446 return 0; 447 } 448 449 #endif 450 451 void __init paging_init(void) 452 { 453 unsigned int node; 454 unsigned long zones_size[MAX_NR_ZONES] = {0, }; 455 456 for_each_online_node(node) { 457 unsigned long start_pfn, end_pfn; 458 459 get_pfn_range_for_nid(node, &start_pfn, &end_pfn); 460 461 if (end_pfn > max_low_pfn) 462 max_low_pfn = end_pfn; 463 } 464 #ifdef CONFIG_ZONE_DMA32 465 zones_size[ZONE_DMA32] = MAX_DMA32_PFN; 466 #endif 467 zones_size[ZONE_NORMAL] = max_low_pfn; 468 free_area_init(zones_size); 469 } 470 471 void __init mem_init(void) 472 { 473 high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT); 474 memblock_free_all(); 475 setup_zero_pages(); /* This comes from node 0 */ 476 } 477 478 int pcibus_to_node(struct pci_bus *bus) 479 { 480 return dev_to_node(&bus->dev); 481 } 482 EXPORT_SYMBOL(pcibus_to_node); 483