xref: /openbmc/linux/arch/loongarch/kernel/numa.c (revision 9144f784f852f9a125cabe9927b986d909bfa439)
1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   * Author:  Xiang Gao <gaoxiang@loongson.cn>
4   *          Huacai Chen <chenhuacai@loongson.cn>
5   *
6   * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
7   */
8  #include <linux/init.h>
9  #include <linux/kernel.h>
10  #include <linux/mm.h>
11  #include <linux/mmzone.h>
12  #include <linux/export.h>
13  #include <linux/nodemask.h>
14  #include <linux/swap.h>
15  #include <linux/memblock.h>
16  #include <linux/pfn.h>
17  #include <linux/acpi.h>
18  #include <linux/efi.h>
19  #include <linux/irq.h>
20  #include <linux/pci.h>
21  #include <asm/bootinfo.h>
22  #include <asm/loongson.h>
23  #include <asm/numa.h>
24  #include <asm/page.h>
25  #include <asm/pgalloc.h>
26  #include <asm/sections.h>
27  #include <asm/time.h>
28  
29  int numa_off;
30  struct pglist_data *node_data[MAX_NUMNODES];
31  unsigned char node_distances[MAX_NUMNODES][MAX_NUMNODES];
32  
33  EXPORT_SYMBOL(node_data);
34  EXPORT_SYMBOL(node_distances);
35  
36  static struct numa_meminfo numa_meminfo;
37  cpumask_t cpus_on_node[MAX_NUMNODES];
38  cpumask_t phys_cpus_on_node[MAX_NUMNODES];
39  EXPORT_SYMBOL(cpus_on_node);
40  
41  /*
42   * apicid, cpu, node mappings
43   */
44  s16 __cpuid_to_node[CONFIG_NR_CPUS] = {
45  	[0 ... CONFIG_NR_CPUS - 1] = NUMA_NO_NODE
46  };
47  EXPORT_SYMBOL(__cpuid_to_node);
48  
49  nodemask_t numa_nodes_parsed __initdata;
50  
51  #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
52  unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
53  EXPORT_SYMBOL(__per_cpu_offset);
54  
pcpu_cpu_to_node(int cpu)55  static int __init pcpu_cpu_to_node(int cpu)
56  {
57  	return early_cpu_to_node(cpu);
58  }
59  
pcpu_cpu_distance(unsigned int from,unsigned int to)60  static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
61  {
62  	if (early_cpu_to_node(from) == early_cpu_to_node(to))
63  		return LOCAL_DISTANCE;
64  	else
65  		return REMOTE_DISTANCE;
66  }
67  
pcpu_populate_pte(unsigned long addr)68  void __init pcpu_populate_pte(unsigned long addr)
69  {
70  	populate_kernel_pte(addr);
71  }
72  
setup_per_cpu_areas(void)73  void __init setup_per_cpu_areas(void)
74  {
75  	unsigned long delta;
76  	unsigned int cpu;
77  	int rc = -EINVAL;
78  
79  	if (pcpu_chosen_fc == PCPU_FC_AUTO) {
80  		if (nr_node_ids >= 8)
81  			pcpu_chosen_fc = PCPU_FC_PAGE;
82  		else
83  			pcpu_chosen_fc = PCPU_FC_EMBED;
84  	}
85  
86  	/*
87  	 * Always reserve area for module percpu variables.  That's
88  	 * what the legacy allocator did.
89  	 */
90  	if (pcpu_chosen_fc != PCPU_FC_PAGE) {
91  		rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
92  					    PERCPU_DYNAMIC_RESERVE, PMD_SIZE,
93  					    pcpu_cpu_distance, pcpu_cpu_to_node);
94  		if (rc < 0)
95  			pr_warn("%s allocator failed (%d), falling back to page size\n",
96  				pcpu_fc_names[pcpu_chosen_fc], rc);
97  	}
98  	if (rc < 0)
99  		rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, pcpu_cpu_to_node);
100  	if (rc < 0)
101  		panic("cannot initialize percpu area (err=%d)", rc);
102  
103  	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
104  	for_each_possible_cpu(cpu)
105  		__per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
106  }
107  #endif
108  
109  /*
110   * Get nodeid by logical cpu number.
111   * __cpuid_to_node maps phyical cpu id to node, so we
112   * should use cpu_logical_map(cpu) to index it.
113   *
114   * This routine is only used in early phase during
115   * booting, after setup_per_cpu_areas calling and numa_node
116   * initialization, cpu_to_node will be used instead.
117   */
early_cpu_to_node(int cpu)118  int early_cpu_to_node(int cpu)
119  {
120  	int physid = cpu_logical_map(cpu);
121  
122  	if (physid < 0)
123  		return NUMA_NO_NODE;
124  
125  	return __cpuid_to_node[physid];
126  }
127  
early_numa_add_cpu(int cpuid,s16 node)128  void __init early_numa_add_cpu(int cpuid, s16 node)
129  {
130  	int cpu = __cpu_number_map[cpuid];
131  
132  	if (cpu < 0)
133  		return;
134  
135  	cpumask_set_cpu(cpu, &cpus_on_node[node]);
136  	cpumask_set_cpu(cpuid, &phys_cpus_on_node[node]);
137  }
138  
numa_add_cpu(unsigned int cpu)139  void numa_add_cpu(unsigned int cpu)
140  {
141  	int nid = cpu_to_node(cpu);
142  	cpumask_set_cpu(cpu, &cpus_on_node[nid]);
143  }
144  
numa_remove_cpu(unsigned int cpu)145  void numa_remove_cpu(unsigned int cpu)
146  {
147  	int nid = cpu_to_node(cpu);
148  	cpumask_clear_cpu(cpu, &cpus_on_node[nid]);
149  }
150  
numa_add_memblk_to(int nid,u64 start,u64 end,struct numa_meminfo * mi)151  static int __init numa_add_memblk_to(int nid, u64 start, u64 end,
152  				     struct numa_meminfo *mi)
153  {
154  	/* ignore zero length blks */
155  	if (start == end)
156  		return 0;
157  
158  	/* whine about and ignore invalid blks */
159  	if (start > end || nid < 0 || nid >= MAX_NUMNODES) {
160  		pr_warn("NUMA: Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n",
161  			   nid, start, end - 1);
162  		return 0;
163  	}
164  
165  	if (mi->nr_blks >= NR_NODE_MEMBLKS) {
166  		pr_err("NUMA: too many memblk ranges\n");
167  		return -EINVAL;
168  	}
169  
170  	mi->blk[mi->nr_blks].start = PFN_ALIGN(start);
171  	mi->blk[mi->nr_blks].end = PFN_ALIGN(end - PAGE_SIZE + 1);
172  	mi->blk[mi->nr_blks].nid = nid;
173  	mi->nr_blks++;
174  	return 0;
175  }
176  
177  /**
178   * numa_add_memblk - Add one numa_memblk to numa_meminfo
179   * @nid: NUMA node ID of the new memblk
180   * @start: Start address of the new memblk
181   * @end: End address of the new memblk
182   *
183   * Add a new memblk to the default numa_meminfo.
184   *
185   * RETURNS:
186   * 0 on success, -errno on failure.
187   */
numa_add_memblk(int nid,u64 start,u64 end)188  int __init numa_add_memblk(int nid, u64 start, u64 end)
189  {
190  	return numa_add_memblk_to(nid, start, end, &numa_meminfo);
191  }
192  
alloc_node_data(int nid)193  static void __init alloc_node_data(int nid)
194  {
195  	void *nd;
196  	unsigned long nd_pa;
197  	size_t nd_sz = roundup(sizeof(pg_data_t), PAGE_SIZE);
198  
199  	nd_pa = memblock_phys_alloc_try_nid(nd_sz, SMP_CACHE_BYTES, nid);
200  	if (!nd_pa) {
201  		pr_err("Cannot find %zu Byte for node_data (initial node: %d)\n", nd_sz, nid);
202  		return;
203  	}
204  
205  	nd = __va(nd_pa);
206  
207  	node_data[nid] = nd;
208  	memset(nd, 0, sizeof(pg_data_t));
209  }
210  
node_mem_init(unsigned int node)211  static void __init node_mem_init(unsigned int node)
212  {
213  	unsigned long start_pfn, end_pfn;
214  	unsigned long node_addrspace_offset;
215  
216  	node_addrspace_offset = nid_to_addrbase(node);
217  	pr_info("Node%d's addrspace_offset is 0x%lx\n",
218  			node, node_addrspace_offset);
219  
220  	get_pfn_range_for_nid(node, &start_pfn, &end_pfn);
221  	pr_info("Node%d: start_pfn=0x%lx, end_pfn=0x%lx\n",
222  		node, start_pfn, end_pfn);
223  
224  	alloc_node_data(node);
225  }
226  
227  #ifdef CONFIG_ACPI_NUMA
228  
add_node_intersection(u32 node,u64 start,u64 size,u32 type)229  static void __init add_node_intersection(u32 node, u64 start, u64 size, u32 type)
230  {
231  	static unsigned long num_physpages;
232  
233  	num_physpages += (size >> PAGE_SHIFT);
234  	pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx Bytes\n",
235  		node, type, start, size);
236  	pr_info("       start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n",
237  		start >> PAGE_SHIFT, (start + size) >> PAGE_SHIFT, num_physpages);
238  	memblock_set_node(start, size, &memblock.memory, node);
239  }
240  
241  /*
242   * add_numamem_region
243   *
244   * Add a uasable memory region described by BIOS. The
245   * routine gets each intersection between BIOS's region
246   * and node's region, and adds them into node's memblock
247   * pool.
248   *
249   */
add_numamem_region(u64 start,u64 end,u32 type)250  static void __init add_numamem_region(u64 start, u64 end, u32 type)
251  {
252  	u32 i;
253  	u64 ofs = start;
254  
255  	if (start >= end) {
256  		pr_debug("Invalid region: %016llx-%016llx\n", start, end);
257  		return;
258  	}
259  
260  	for (i = 0; i < numa_meminfo.nr_blks; i++) {
261  		struct numa_memblk *mb = &numa_meminfo.blk[i];
262  
263  		if (ofs > mb->end)
264  			continue;
265  
266  		if (end > mb->end) {
267  			add_node_intersection(mb->nid, ofs, mb->end - ofs, type);
268  			ofs = mb->end;
269  		} else {
270  			add_node_intersection(mb->nid, ofs, end - ofs, type);
271  			break;
272  		}
273  	}
274  }
275  
init_node_memblock(void)276  static void __init init_node_memblock(void)
277  {
278  	u32 mem_type;
279  	u64 mem_end, mem_start, mem_size;
280  	efi_memory_desc_t *md;
281  
282  	/* Parse memory information and activate */
283  	for_each_efi_memory_desc(md) {
284  		mem_type = md->type;
285  		mem_start = md->phys_addr;
286  		mem_size = md->num_pages << EFI_PAGE_SHIFT;
287  		mem_end = mem_start + mem_size;
288  
289  		switch (mem_type) {
290  		case EFI_LOADER_CODE:
291  		case EFI_LOADER_DATA:
292  		case EFI_BOOT_SERVICES_CODE:
293  		case EFI_BOOT_SERVICES_DATA:
294  		case EFI_PERSISTENT_MEMORY:
295  		case EFI_CONVENTIONAL_MEMORY:
296  			add_numamem_region(mem_start, mem_end, mem_type);
297  			break;
298  		case EFI_PAL_CODE:
299  		case EFI_UNUSABLE_MEMORY:
300  		case EFI_ACPI_RECLAIM_MEMORY:
301  			add_numamem_region(mem_start, mem_end, mem_type);
302  			fallthrough;
303  		case EFI_RESERVED_TYPE:
304  		case EFI_RUNTIME_SERVICES_CODE:
305  		case EFI_RUNTIME_SERVICES_DATA:
306  		case EFI_MEMORY_MAPPED_IO:
307  		case EFI_MEMORY_MAPPED_IO_PORT_SPACE:
308  			pr_info("Resvd: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx Bytes\n",
309  					mem_type, mem_start, mem_size);
310  			break;
311  		}
312  	}
313  }
314  
numa_default_distance(void)315  static void __init numa_default_distance(void)
316  {
317  	int row, col;
318  
319  	for (row = 0; row < MAX_NUMNODES; row++)
320  		for (col = 0; col < MAX_NUMNODES; col++) {
321  			if (col == row)
322  				node_distances[row][col] = LOCAL_DISTANCE;
323  			else
324  				/* We assume that one node per package here!
325  				 *
326  				 * A SLIT should be used for multiple nodes
327  				 * per package to override default setting.
328  				 */
329  				node_distances[row][col] = REMOTE_DISTANCE;
330  	}
331  }
332  
333  /*
334   * fake_numa_init() - For Non-ACPI systems
335   * Return: 0 on success, -errno on failure.
336   */
fake_numa_init(void)337  static int __init fake_numa_init(void)
338  {
339  	phys_addr_t start = memblock_start_of_DRAM();
340  	phys_addr_t end = memblock_end_of_DRAM() - 1;
341  
342  	node_set(0, numa_nodes_parsed);
343  	pr_info("Faking a node at [mem %pap-%pap]\n", &start, &end);
344  
345  	return numa_add_memblk(0, start, end + 1);
346  }
347  
init_numa_memory(void)348  int __init init_numa_memory(void)
349  {
350  	int i;
351  	int ret;
352  	int node;
353  
354  	for (i = 0; i < NR_CPUS; i++)
355  		set_cpuid_to_node(i, NUMA_NO_NODE);
356  
357  	numa_default_distance();
358  	nodes_clear(numa_nodes_parsed);
359  	nodes_clear(node_possible_map);
360  	nodes_clear(node_online_map);
361  	memset(&numa_meminfo, 0, sizeof(numa_meminfo));
362  
363  	/* Parse SRAT and SLIT if provided by firmware. */
364  	ret = acpi_disabled ? fake_numa_init() : acpi_numa_init();
365  	if (ret < 0)
366  		return ret;
367  
368  	node_possible_map = numa_nodes_parsed;
369  	if (WARN_ON(nodes_empty(node_possible_map)))
370  		return -EINVAL;
371  
372  	init_node_memblock();
373  	if (!memblock_validate_numa_coverage(SZ_1M))
374  		return -EINVAL;
375  
376  	for_each_node_mask(node, node_possible_map) {
377  		node_mem_init(node);
378  		node_set_online(node);
379  	}
380  	max_low_pfn = PHYS_PFN(memblock_end_of_DRAM());
381  
382  	setup_nr_node_ids();
383  	loongson_sysconf.nr_nodes = nr_node_ids;
384  	loongson_sysconf.cores_per_node = cpumask_weight(&phys_cpus_on_node[0]);
385  
386  	return 0;
387  }
388  
389  #endif
390  
paging_init(void)391  void __init paging_init(void)
392  {
393  	unsigned int node;
394  	unsigned long zones_size[MAX_NR_ZONES] = {0, };
395  
396  	for_each_online_node(node) {
397  		unsigned long start_pfn, end_pfn;
398  
399  		get_pfn_range_for_nid(node, &start_pfn, &end_pfn);
400  
401  		if (end_pfn > max_low_pfn)
402  			max_low_pfn = end_pfn;
403  	}
404  #ifdef CONFIG_ZONE_DMA32
405  	zones_size[ZONE_DMA32] = MAX_DMA32_PFN;
406  #endif
407  	zones_size[ZONE_NORMAL] = max_low_pfn;
408  	free_area_init(zones_size);
409  }
410  
mem_init(void)411  void __init mem_init(void)
412  {
413  	high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT);
414  	memblock_free_all();
415  }
416  
pcibus_to_node(struct pci_bus * bus)417  int pcibus_to_node(struct pci_bus *bus)
418  {
419  	return dev_to_node(&bus->dev);
420  }
421  EXPORT_SYMBOL(pcibus_to_node);
422