xref: /openbmc/linux/arch/ia64/mm/discontig.c (revision 4f2c0a4acffbec01079c28f839422e64ddeff004)
1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds  * Copyright (c) 2000, 2003 Silicon Graphics, Inc.  All rights reserved.
41da177e4SLinus Torvalds  * Copyright (c) 2001 Intel Corp.
51da177e4SLinus Torvalds  * Copyright (c) 2001 Tony Luck <tony.luck@intel.com>
61da177e4SLinus Torvalds  * Copyright (c) 2002 NEC Corp.
71da177e4SLinus Torvalds  * Copyright (c) 2002 Kimio Suganuma <k-suganuma@da.jp.nec.com>
81da177e4SLinus Torvalds  * Copyright (c) 2004 Silicon Graphics, Inc
91da177e4SLinus Torvalds  *	Russ Anderson <rja@sgi.com>
101da177e4SLinus Torvalds  *	Jesse Barnes <jbarnes@sgi.com>
111da177e4SLinus Torvalds  *	Jack Steiner <steiner@sgi.com>
121da177e4SLinus Torvalds  */
131da177e4SLinus Torvalds 
141da177e4SLinus Torvalds /*
151da177e4SLinus Torvalds  * Platform initialization for Discontig Memory
161da177e4SLinus Torvalds  */
171da177e4SLinus Torvalds 
181da177e4SLinus Torvalds #include <linux/kernel.h>
191da177e4SLinus Torvalds #include <linux/mm.h>
2099a19cf1SPrarit Bhargava #include <linux/nmi.h>
211da177e4SLinus Torvalds #include <linux/swap.h>
22f6280099SMike Rapoport #include <linux/memblock.h>
231da177e4SLinus Torvalds #include <linux/acpi.h>
241da177e4SLinus Torvalds #include <linux/efi.h>
251da177e4SLinus Torvalds #include <linux/nodemask.h>
265a0e3ad6STejun Heo #include <linux/slab.h>
278ff059b8SArd Biesheuvel #include <asm/efi.h>
281da177e4SLinus Torvalds #include <asm/tlb.h>
291da177e4SLinus Torvalds #include <asm/meminit.h>
301da177e4SLinus Torvalds #include <asm/numa.h>
311da177e4SLinus Torvalds #include <asm/sections.h>
321da177e4SLinus Torvalds 
331da177e4SLinus Torvalds /*
341da177e4SLinus Torvalds  * Track per-node information needed to setup the boot memory allocator, the
351da177e4SLinus Torvalds  * per-node areas, and the real VM.
361da177e4SLinus Torvalds  */
371da177e4SLinus Torvalds struct early_node_data {
381da177e4SLinus Torvalds 	struct ia64_node_data *node_data;
391da177e4SLinus Torvalds 	unsigned long pernode_addr;
401da177e4SLinus Torvalds 	unsigned long pernode_size;
411da177e4SLinus Torvalds 	unsigned long min_pfn;
421da177e4SLinus Torvalds 	unsigned long max_pfn;
431da177e4SLinus Torvalds };
441da177e4SLinus Torvalds 
451da177e4SLinus Torvalds static struct early_node_data mem_data[MAX_NUMNODES] __initdata;
46564601a5Sbob.picco static nodemask_t memory_less_mask __initdata;
471da177e4SLinus Torvalds 
48fd59d231SKen'ichi Ohmichi pg_data_t *pgdat_list[MAX_NUMNODES];
49ae5a2c1cSYasunori Goto 
501da177e4SLinus Torvalds /*
511da177e4SLinus Torvalds  * To prevent cache aliasing effects, align per-node structures so that they
521da177e4SLinus Torvalds  * start at addresses that are strided by node number.
531da177e4SLinus Torvalds  */
54acb7f672SJack Steiner #define MAX_NODE_ALIGN_OFFSET	(32 * 1024 * 1024)
551da177e4SLinus Torvalds #define NODEDATA_ALIGN(addr, node)						\
56acb7f672SJack Steiner 	((((addr) + 1024*1024-1) & ~(1024*1024-1)) + 				\
57acb7f672SJack Steiner 	     (((node)*PERCPU_PAGE_SIZE) & (MAX_NODE_ALIGN_OFFSET - 1)))
581da177e4SLinus Torvalds 
591da177e4SLinus Torvalds /**
60fb63fbeeSMike Rapoport  * build_node_maps - callback to setup mem_data structs for each node
611da177e4SLinus Torvalds  * @start: physical start of range
621da177e4SLinus Torvalds  * @len: length of range
631da177e4SLinus Torvalds  * @node: node where this range resides
641da177e4SLinus Torvalds  *
65fb63fbeeSMike Rapoport  * Detect extents of each piece of memory that we wish to
661da177e4SLinus Torvalds  * treat as a virtually contiguous block (i.e. each node). Each such block
671da177e4SLinus Torvalds  * must start on an %IA64_GRANULE_SIZE boundary, so we round the address down
681da177e4SLinus Torvalds  * if necessary.  Any non-existent pages will simply be part of the virtual
69fb63fbeeSMike Rapoport  * memmap.
701da177e4SLinus Torvalds  */
build_node_maps(unsigned long start,unsigned long len,int node)711da177e4SLinus Torvalds static int __init build_node_maps(unsigned long start, unsigned long len,
721da177e4SLinus Torvalds 				  int node)
731da177e4SLinus Torvalds {
743560e249SJohannes Weiner 	unsigned long spfn, epfn, end = start + len;
751da177e4SLinus Torvalds 
761da177e4SLinus Torvalds 	epfn = GRANULEROUNDUP(end) >> PAGE_SHIFT;
773560e249SJohannes Weiner 	spfn = GRANULEROUNDDOWN(start) >> PAGE_SHIFT;
781da177e4SLinus Torvalds 
79fb63fbeeSMike Rapoport 	if (!mem_data[node].min_pfn) {
80fb63fbeeSMike Rapoport 		mem_data[node].min_pfn = spfn;
81fb63fbeeSMike Rapoport 		mem_data[node].max_pfn = epfn;
821da177e4SLinus Torvalds 	} else {
83fb63fbeeSMike Rapoport 		mem_data[node].min_pfn = min(spfn, mem_data[node].min_pfn);
84fb63fbeeSMike Rapoport 		mem_data[node].max_pfn = max(epfn, mem_data[node].max_pfn);
851da177e4SLinus Torvalds 	}
861da177e4SLinus Torvalds 
871da177e4SLinus Torvalds 	return 0;
881da177e4SLinus Torvalds }
891da177e4SLinus Torvalds 
901da177e4SLinus Torvalds /**
911da177e4SLinus Torvalds  * early_nr_cpus_node - return number of cpus on a given node
921da177e4SLinus Torvalds  * @node: node to check
931da177e4SLinus Torvalds  *
941da177e4SLinus Torvalds  * Count the number of cpus on @node.  We can't use nr_cpus_node() yet because
951da177e4SLinus Torvalds  * acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been
961da177e4SLinus Torvalds  * called yet.  Note that node 0 will also count all non-existent cpus.
971da177e4SLinus Torvalds  */
early_nr_cpus_node(int node)98e2af9da4SRandy Dunlap static int early_nr_cpus_node(int node)
991da177e4SLinus Torvalds {
1001da177e4SLinus Torvalds 	int cpu, n = 0;
1011da177e4SLinus Torvalds 
1022c6e6db4Sholt@sgi.com 	for_each_possible_early_cpu(cpu)
1031da177e4SLinus Torvalds 		if (node == node_cpuid[cpu].nid)
1041da177e4SLinus Torvalds 			n++;
1051da177e4SLinus Torvalds 
1061da177e4SLinus Torvalds 	return n;
1071da177e4SLinus Torvalds }
1081da177e4SLinus Torvalds 
1091da177e4SLinus Torvalds /**
110564601a5Sbob.picco  * compute_pernodesize - compute size of pernode data
111564601a5Sbob.picco  * @node: the node id.
1121da177e4SLinus Torvalds  */
compute_pernodesize(int node)113e2af9da4SRandy Dunlap static unsigned long compute_pernodesize(int node)
1141da177e4SLinus Torvalds {
115564601a5Sbob.picco 	unsigned long pernodesize = 0, cpus;
1161da177e4SLinus Torvalds 
1171da177e4SLinus Torvalds 	cpus = early_nr_cpus_node(node);
1181da177e4SLinus Torvalds 	pernodesize += PERCPU_PAGE_SIZE * cpus;
1191da177e4SLinus Torvalds 	pernodesize += node * L1_CACHE_BYTES;
1201da177e4SLinus Torvalds 	pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t));
1211da177e4SLinus Torvalds 	pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
12241bd26d6Sholt@sgi.com 	pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t));
1231da177e4SLinus Torvalds 	pernodesize = PAGE_ALIGN(pernodesize);
124564601a5Sbob.picco 	return pernodesize;
125564601a5Sbob.picco }
1261da177e4SLinus Torvalds 
127564601a5Sbob.picco /**
1288d7e3517STony Luck  * per_cpu_node_setup - setup per-cpu areas on each node
1298d7e3517STony Luck  * @cpu_data: per-cpu area on this node
1308d7e3517STony Luck  * @node: node to setup
1318d7e3517STony Luck  *
1328d7e3517STony Luck  * Copy the static per-cpu data into the region we just set aside and then
1338d7e3517STony Luck  * setup __per_cpu_offset for each CPU on this node.  Return a pointer to
1348d7e3517STony Luck  * the end of the area.
1358d7e3517STony Luck  */
per_cpu_node_setup(void * cpu_data,int node)1368d7e3517STony Luck static void *per_cpu_node_setup(void *cpu_data, int node)
1378d7e3517STony Luck {
1388d7e3517STony Luck #ifdef CONFIG_SMP
1398d7e3517STony Luck 	int cpu;
1408d7e3517STony Luck 
1412c6e6db4Sholt@sgi.com 	for_each_possible_early_cpu(cpu) {
14236886478STejun Heo 		void *src = cpu == 0 ? __cpu0_per_cpu : __phys_per_cpu_start;
14336886478STejun Heo 
14436886478STejun Heo 		if (node != node_cpuid[cpu].nid)
14536886478STejun Heo 			continue;
14636886478STejun Heo 
14736886478STejun Heo 		memcpy(__va(cpu_data), src, __per_cpu_end - __per_cpu_start);
1488d7e3517STony Luck 		__per_cpu_offset[cpu] = (char *)__va(cpu_data) -
1498d7e3517STony Luck 			__per_cpu_start;
15036886478STejun Heo 
15136886478STejun Heo 		/*
15236886478STejun Heo 		 * percpu area for cpu0 is moved from the __init area
15336886478STejun Heo 		 * which is setup by head.S and used till this point.
15436886478STejun Heo 		 * Update ar.k3.  This move is ensures that percpu
15536886478STejun Heo 		 * area for cpu0 is on the correct node and its
15636886478STejun Heo 		 * virtual address isn't insanely far from other
15736886478STejun Heo 		 * percpu areas which is important for congruent
15836886478STejun Heo 		 * percpu allocator.
15936886478STejun Heo 		 */
16036886478STejun Heo 		if (cpu == 0)
16136886478STejun Heo 			ia64_set_kr(IA64_KR_PER_CPU_DATA,
16236886478STejun Heo 				    (unsigned long)cpu_data -
16336886478STejun Heo 				    (unsigned long)__per_cpu_start);
16436886478STejun Heo 
1658d7e3517STony Luck 		cpu_data += PERCPU_PAGE_SIZE;
1668d7e3517STony Luck 	}
1678d7e3517STony Luck #endif
1688d7e3517STony Luck 	return cpu_data;
1698d7e3517STony Luck }
1708d7e3517STony Luck 
17152594762STejun Heo #ifdef CONFIG_SMP
17252594762STejun Heo /**
17352594762STejun Heo  * setup_per_cpu_areas - setup percpu areas
17452594762STejun Heo  *
17552594762STejun Heo  * Arch code has already allocated and initialized percpu areas.  All
17652594762STejun Heo  * this function has to do is to teach the determined layout to the
17752594762STejun Heo  * dynamic percpu allocator, which happens to be more complex than
17852594762STejun Heo  * creating whole new ones using helpers.
17952594762STejun Heo  */
setup_per_cpu_areas(void)18052594762STejun Heo void __init setup_per_cpu_areas(void)
18152594762STejun Heo {
18252594762STejun Heo 	struct pcpu_alloc_info *ai;
1833f649ab7SKees Cook 	struct pcpu_group_info *gi;
18452594762STejun Heo 	unsigned int *cpu_map;
18552594762STejun Heo 	void *base;
18652594762STejun Heo 	unsigned long base_offset;
18752594762STejun Heo 	unsigned int cpu;
18852594762STejun Heo 	ssize_t static_size, reserved_size, dyn_size;
189722e6f50STony Luck 	int node, prev_node, unit, nr_units;
19052594762STejun Heo 
19152594762STejun Heo 	ai = pcpu_alloc_alloc_info(MAX_NUMNODES, nr_cpu_ids);
19252594762STejun Heo 	if (!ai)
19352594762STejun Heo 		panic("failed to allocate pcpu_alloc_info");
19452594762STejun Heo 	cpu_map = ai->groups[0].cpu_map;
19552594762STejun Heo 
19652594762STejun Heo 	/* determine base */
19752594762STejun Heo 	base = (void *)ULONG_MAX;
19852594762STejun Heo 	for_each_possible_cpu(cpu)
19952594762STejun Heo 		base = min(base,
20052594762STejun Heo 			   (void *)(__per_cpu_offset[cpu] + __per_cpu_start));
20152594762STejun Heo 	base_offset = (void *)__per_cpu_start - base;
20252594762STejun Heo 
20352594762STejun Heo 	/* build cpu_map, units are grouped by node */
20452594762STejun Heo 	unit = 0;
20552594762STejun Heo 	for_each_node(node)
20652594762STejun Heo 		for_each_possible_cpu(cpu)
20752594762STejun Heo 			if (node == node_cpuid[cpu].nid)
20852594762STejun Heo 				cpu_map[unit++] = cpu;
20952594762STejun Heo 	nr_units = unit;
21052594762STejun Heo 
21152594762STejun Heo 	/* set basic parameters */
21252594762STejun Heo 	static_size = __per_cpu_end - __per_cpu_start;
21352594762STejun Heo 	reserved_size = PERCPU_MODULE_RESERVE;
21452594762STejun Heo 	dyn_size = PERCPU_PAGE_SIZE - static_size - reserved_size;
21552594762STejun Heo 	if (dyn_size < 0)
21652594762STejun Heo 		panic("percpu area overflow static=%zd reserved=%zd\n",
21752594762STejun Heo 		      static_size, reserved_size);
21852594762STejun Heo 
21952594762STejun Heo 	ai->static_size		= static_size;
22052594762STejun Heo 	ai->reserved_size	= reserved_size;
22152594762STejun Heo 	ai->dyn_size		= dyn_size;
22252594762STejun Heo 	ai->unit_size		= PERCPU_PAGE_SIZE;
22352594762STejun Heo 	ai->atom_size		= PAGE_SIZE;
22452594762STejun Heo 	ai->alloc_size		= PERCPU_PAGE_SIZE;
22552594762STejun Heo 
22652594762STejun Heo 	/*
22752594762STejun Heo 	 * CPUs are put into groups according to node.  Walk cpu_map
22852594762STejun Heo 	 * and create new groups at node boundaries.
22952594762STejun Heo 	 */
23098fa15f3SAnshuman Khandual 	prev_node = NUMA_NO_NODE;
23152594762STejun Heo 	ai->nr_groups = 0;
23252594762STejun Heo 	for (unit = 0; unit < nr_units; unit++) {
23352594762STejun Heo 		cpu = cpu_map[unit];
23452594762STejun Heo 		node = node_cpuid[cpu].nid;
23552594762STejun Heo 
23652594762STejun Heo 		if (node == prev_node) {
23752594762STejun Heo 			gi->nr_units++;
23852594762STejun Heo 			continue;
23952594762STejun Heo 		}
24052594762STejun Heo 		prev_node = node;
24152594762STejun Heo 
24252594762STejun Heo 		gi = &ai->groups[ai->nr_groups++];
24352594762STejun Heo 		gi->nr_units		= 1;
24452594762STejun Heo 		gi->base_offset		= __per_cpu_offset[cpu] + base_offset;
24552594762STejun Heo 		gi->cpu_map		= &cpu_map[unit];
24652594762STejun Heo 	}
24752594762STejun Heo 
248163fa234SKefeng Wang 	pcpu_setup_first_chunk(ai, base);
24952594762STejun Heo 	pcpu_free_alloc_info(ai);
25052594762STejun Heo }
25152594762STejun Heo #endif
25252594762STejun Heo 
2538d7e3517STony Luck /**
254564601a5Sbob.picco  * fill_pernode - initialize pernode data.
255564601a5Sbob.picco  * @node: the node id.
256564601a5Sbob.picco  * @pernode: physical address of pernode data
257564601a5Sbob.picco  * @pernodesize: size of the pernode data
258564601a5Sbob.picco  */
fill_pernode(int node,unsigned long pernode,unsigned long pernodesize)259564601a5Sbob.picco static void __init fill_pernode(int node, unsigned long pernode,
260564601a5Sbob.picco 	unsigned long pernodesize)
261564601a5Sbob.picco {
262564601a5Sbob.picco 	void *cpu_data;
2638d7e3517STony Luck 	int cpus = early_nr_cpus_node(node);
264564601a5Sbob.picco 
2651da177e4SLinus Torvalds 	mem_data[node].pernode_addr = pernode;
2661da177e4SLinus Torvalds 	mem_data[node].pernode_size = pernodesize;
2671da177e4SLinus Torvalds 	memset(__va(pernode), 0, pernodesize);
2681da177e4SLinus Torvalds 
2691da177e4SLinus Torvalds 	cpu_data = (void *)pernode;
2701da177e4SLinus Torvalds 	pernode += PERCPU_PAGE_SIZE * cpus;
2711da177e4SLinus Torvalds 	pernode += node * L1_CACHE_BYTES;
2721da177e4SLinus Torvalds 
273ae5a2c1cSYasunori Goto 	pgdat_list[node] = __va(pernode);
2741da177e4SLinus Torvalds 	pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));
2751da177e4SLinus Torvalds 
2761da177e4SLinus Torvalds 	mem_data[node].node_data = __va(pernode);
2771da177e4SLinus Torvalds 	pernode += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
2781da177e4SLinus Torvalds 	pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));
2791da177e4SLinus Torvalds 
2808d7e3517STony Luck 	cpu_data = per_cpu_node_setup(cpu_data, node);
281564601a5Sbob.picco 
282564601a5Sbob.picco 	return;
2831da177e4SLinus Torvalds }
2848d7e3517STony Luck 
285564601a5Sbob.picco /**
286564601a5Sbob.picco  * find_pernode_space - allocate memory for memory map and per-node structures
287564601a5Sbob.picco  * @start: physical start of range
288564601a5Sbob.picco  * @len: length of range
289564601a5Sbob.picco  * @node: node where this range resides
290564601a5Sbob.picco  *
291564601a5Sbob.picco  * This routine reserves space for the per-cpu data struct, the list of
292564601a5Sbob.picco  * pg_data_ts and the per-node data struct.  Each node will have something like
293564601a5Sbob.picco  * the following in the first chunk of addr. space large enough to hold it.
294564601a5Sbob.picco  *
295564601a5Sbob.picco  *    ________________________
296564601a5Sbob.picco  *   |                        |
297564601a5Sbob.picco  *   |~~~~~~~~~~~~~~~~~~~~~~~~| <-- NODEDATA_ALIGN(start, node) for the first
298564601a5Sbob.picco  *   |    PERCPU_PAGE_SIZE *  |     start and length big enough
299564601a5Sbob.picco  *   |    cpus_on_this_node   | Node 0 will also have entries for all non-existent cpus.
300564601a5Sbob.picco  *   |------------------------|
301564601a5Sbob.picco  *   |   local pg_data_t *    |
302564601a5Sbob.picco  *   |------------------------|
303564601a5Sbob.picco  *   |  local ia64_node_data  |
304564601a5Sbob.picco  *   |------------------------|
305564601a5Sbob.picco  *   |          ???           |
306564601a5Sbob.picco  *   |________________________|
307564601a5Sbob.picco  *
308564601a5Sbob.picco  * Once this space has been set aside, the bootmem maps are initialized.  We
309564601a5Sbob.picco  * could probably move the allocation of the per-cpu and ia64_node_data space
310564601a5Sbob.picco  * outside of this function and use alloc_bootmem_node(), but doing it here
311564601a5Sbob.picco  * is straightforward and we get the alignments we want so...
312564601a5Sbob.picco  */
find_pernode_space(unsigned long start,unsigned long len,int node)313564601a5Sbob.picco static int __init find_pernode_space(unsigned long start, unsigned long len,
314564601a5Sbob.picco 				     int node)
315564601a5Sbob.picco {
3163560e249SJohannes Weiner 	unsigned long spfn, epfn;
317f6280099SMike Rapoport 	unsigned long pernodesize = 0, pernode;
318564601a5Sbob.picco 
3193560e249SJohannes Weiner 	spfn = start >> PAGE_SHIFT;
320564601a5Sbob.picco 	epfn = (start + len) >> PAGE_SHIFT;
321564601a5Sbob.picco 
322564601a5Sbob.picco 	/*
323564601a5Sbob.picco 	 * Make sure this memory falls within this node's usable memory
324564601a5Sbob.picco 	 * since we may have thrown some away in build_maps().
325564601a5Sbob.picco 	 */
326fb63fbeeSMike Rapoport 	if (spfn < mem_data[node].min_pfn || epfn > mem_data[node].max_pfn)
327564601a5Sbob.picco 		return 0;
328564601a5Sbob.picco 
329564601a5Sbob.picco 	/* Don't setup this node's local space twice... */
330564601a5Sbob.picco 	if (mem_data[node].pernode_addr)
331564601a5Sbob.picco 		return 0;
332564601a5Sbob.picco 
333564601a5Sbob.picco 	/*
334564601a5Sbob.picco 	 * Calculate total size needed, incl. what's necessary
335564601a5Sbob.picco 	 * for good alignment and alias prevention.
336564601a5Sbob.picco 	 */
337564601a5Sbob.picco 	pernodesize = compute_pernodesize(node);
338564601a5Sbob.picco 	pernode = NODEDATA_ALIGN(start, node);
339564601a5Sbob.picco 
340564601a5Sbob.picco 	/* Is this range big enough for what we want to store here? */
341f6280099SMike Rapoport 	if (start + len > (pernode + pernodesize))
342564601a5Sbob.picco 		fill_pernode(node, pernode, pernodesize);
3431da177e4SLinus Torvalds 
3441da177e4SLinus Torvalds 	return 0;
3451da177e4SLinus Torvalds }
3461da177e4SLinus Torvalds 
3471da177e4SLinus Torvalds /**
3481da177e4SLinus Torvalds  * reserve_pernode_space - reserve memory for per-node space
3491da177e4SLinus Torvalds  *
3501da177e4SLinus Torvalds  * Reserve the space used by the bootmem maps & per-node space in the boot
3511da177e4SLinus Torvalds  * allocator so that when we actually create the real mem maps we don't
3521da177e4SLinus Torvalds  * use their memory.
3531da177e4SLinus Torvalds  */
reserve_pernode_space(void)3541da177e4SLinus Torvalds static void __init reserve_pernode_space(void)
3551da177e4SLinus Torvalds {
356f6280099SMike Rapoport 	unsigned long base, size;
3571da177e4SLinus Torvalds 	int node;
3581da177e4SLinus Torvalds 
3591da177e4SLinus Torvalds 	for_each_online_node(node) {
360564601a5Sbob.picco 		if (node_isset(node, memory_less_mask))
361564601a5Sbob.picco 			continue;
362564601a5Sbob.picco 
3631da177e4SLinus Torvalds 		/* Now the per-node space */
3641da177e4SLinus Torvalds 		size = mem_data[node].pernode_size;
3651da177e4SLinus Torvalds 		base = __pa(mem_data[node].pernode_addr);
366f6280099SMike Rapoport 		memblock_reserve(base, size);
3671da177e4SLinus Torvalds 	}
3681da177e4SLinus Torvalds }
3691da177e4SLinus Torvalds 
scatter_node_data(void)370e2af9da4SRandy Dunlap static void scatter_node_data(void)
3717049027cSYasunori Goto {
3727049027cSYasunori Goto 	pg_data_t **dst;
3737049027cSYasunori Goto 	int node;
3747049027cSYasunori Goto 
375dd8041f1SYasunori Goto 	/*
376dd8041f1SYasunori Goto 	 * for_each_online_node() can't be used at here.
377dd8041f1SYasunori Goto 	 * node_online_map is not set for hot-added nodes at this time,
378dd8041f1SYasunori Goto 	 * because we are halfway through initialization of the new node's
379dd8041f1SYasunori Goto 	 * structures.  If for_each_online_node() is used, a new node's
38072fdbdceSSimon Arlott 	 * pg_data_ptrs will be not initialized. Instead of using it,
381dd8041f1SYasunori Goto 	 * pgdat_list[] is checked.
382dd8041f1SYasunori Goto 	 */
383dd8041f1SYasunori Goto 	for_each_node(node) {
384dd8041f1SYasunori Goto 		if (pgdat_list[node]) {
3857049027cSYasunori Goto 			dst = LOCAL_DATA_ADDR(pgdat_list[node])->pg_data_ptrs;
3867049027cSYasunori Goto 			memcpy(dst, pgdat_list, sizeof(pgdat_list));
3877049027cSYasunori Goto 		}
3887049027cSYasunori Goto 	}
389dd8041f1SYasunori Goto }
3907049027cSYasunori Goto 
3911da177e4SLinus Torvalds /**
3921da177e4SLinus Torvalds  * initialize_pernode_data - fixup per-cpu & per-node pointers
3931da177e4SLinus Torvalds  *
3941da177e4SLinus Torvalds  * Each node's per-node area has a copy of the global pg_data_t list, so
3951da177e4SLinus Torvalds  * we copy that to each node here, as well as setting the per-cpu pointer
39605933aacSChristoph Hellwig  * to the local node data structure.
3971da177e4SLinus Torvalds  */
initialize_pernode_data(void)3981da177e4SLinus Torvalds static void __init initialize_pernode_data(void)
3991da177e4SLinus Torvalds {
4008d7e3517STony Luck 	int cpu, node;
4011da177e4SLinus Torvalds 
4027049027cSYasunori Goto 	scatter_node_data();
4037049027cSYasunori Goto 
4048d7e3517STony Luck #ifdef CONFIG_SMP
4051da177e4SLinus Torvalds 	/* Set the node_data pointer for each per-cpu struct */
4062c6e6db4Sholt@sgi.com 	for_each_possible_early_cpu(cpu) {
4071da177e4SLinus Torvalds 		node = node_cpuid[cpu].nid;
408877105ccSTejun Heo 		per_cpu(ia64_cpu_info, cpu).node_data =
409877105ccSTejun Heo 			mem_data[node].node_data;
4101da177e4SLinus Torvalds 	}
4118d7e3517STony Luck #else
4128d7e3517STony Luck 	{
4138d7e3517STony Luck 		struct cpuinfo_ia64 *cpu0_cpu_info;
4148d7e3517STony Luck 		cpu = 0;
4158d7e3517STony Luck 		node = node_cpuid[cpu].nid;
4168d7e3517STony Luck 		cpu0_cpu_info = (struct cpuinfo_ia64 *)(__phys_per_cpu_start +
417dd17c8f7SRusty Russell 			((char *)&ia64_cpu_info - __per_cpu_start));
4188d7e3517STony Luck 		cpu0_cpu_info->node_data = mem_data[node].node_data;
4198d7e3517STony Luck 	}
4208d7e3517STony Luck #endif /* CONFIG_SMP */
4211da177e4SLinus Torvalds }
4221da177e4SLinus Torvalds 
4231da177e4SLinus Torvalds /**
424564601a5Sbob.picco  * memory_less_node_alloc - * attempt to allocate memory on the best NUMA slit
425564601a5Sbob.picco  * 	node but fall back to any other node when __alloc_bootmem_node fails
426564601a5Sbob.picco  *	for best.
427564601a5Sbob.picco  * @nid: node id
428564601a5Sbob.picco  * @pernodesize: size of this node's pernode data
429564601a5Sbob.picco  */
memory_less_node_alloc(int nid,unsigned long pernodesize)43097835245SBob Picco static void __init *memory_less_node_alloc(int nid, unsigned long pernodesize)
431564601a5Sbob.picco {
432564601a5Sbob.picco 	void *ptr = NULL;
433564601a5Sbob.picco 	u8 best = 0xff;
43498fa15f3SAnshuman Khandual 	int bestnode = NUMA_NO_NODE, node, anynode = 0;
435564601a5Sbob.picco 
436564601a5Sbob.picco 	for_each_online_node(node) {
437564601a5Sbob.picco 		if (node_isset(node, memory_less_mask))
438564601a5Sbob.picco 			continue;
439564601a5Sbob.picco 		else if (node_distance(nid, node) < best) {
440564601a5Sbob.picco 			best = node_distance(nid, node);
441564601a5Sbob.picco 			bestnode = node;
442564601a5Sbob.picco 		}
44397835245SBob Picco 		anynode = node;
444564601a5Sbob.picco 	}
445564601a5Sbob.picco 
44698fa15f3SAnshuman Khandual 	if (bestnode == NUMA_NO_NODE)
44797835245SBob Picco 		bestnode = anynode;
448564601a5Sbob.picco 
449ccfa2a0fSMike Rapoport 	ptr = memblock_alloc_try_nid(pernodesize, PERCPU_PAGE_SIZE,
450ccfa2a0fSMike Rapoport 				     __pa(MAX_DMA_ADDRESS),
45197ad1087SMike Rapoport 				     MEMBLOCK_ALLOC_ACCESSIBLE,
452ccfa2a0fSMike Rapoport 				     bestnode);
453d80db5c1SMike Rapoport 	if (!ptr)
454d80db5c1SMike Rapoport 		panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%lx\n",
455d80db5c1SMike Rapoport 		      __func__, pernodesize, PERCPU_PAGE_SIZE, bestnode,
456d80db5c1SMike Rapoport 		      __pa(MAX_DMA_ADDRESS));
45797835245SBob Picco 
458564601a5Sbob.picco 	return ptr;
459564601a5Sbob.picco }
460564601a5Sbob.picco 
461564601a5Sbob.picco /**
462564601a5Sbob.picco  * memory_less_nodes - allocate and initialize CPU only nodes pernode
463564601a5Sbob.picco  *	information.
464564601a5Sbob.picco  */
memory_less_nodes(void)465564601a5Sbob.picco static void __init memory_less_nodes(void)
466564601a5Sbob.picco {
467564601a5Sbob.picco 	unsigned long pernodesize;
468564601a5Sbob.picco 	void *pernode;
469564601a5Sbob.picco 	int node;
470564601a5Sbob.picco 
471564601a5Sbob.picco 	for_each_node_mask(node, memory_less_mask) {
472564601a5Sbob.picco 		pernodesize = compute_pernodesize(node);
47397835245SBob Picco 		pernode = memory_less_node_alloc(node, pernodesize);
474564601a5Sbob.picco 		fill_pernode(node, __pa(pernode), pernodesize);
475564601a5Sbob.picco 	}
476564601a5Sbob.picco 
477564601a5Sbob.picco 	return;
478564601a5Sbob.picco }
479564601a5Sbob.picco 
480564601a5Sbob.picco /**
4811da177e4SLinus Torvalds  * find_memory - walk the EFI memory map and setup the bootmem allocator
4821da177e4SLinus Torvalds  *
4831da177e4SLinus Torvalds  * Called early in boot to setup the bootmem allocator, and to
4841da177e4SLinus Torvalds  * allocate the per-cpu and per-node structures.
4851da177e4SLinus Torvalds  */
find_memory(void)4861da177e4SLinus Torvalds void __init find_memory(void)
4871da177e4SLinus Torvalds {
4881da177e4SLinus Torvalds 	int node;
4891da177e4SLinus Torvalds 
4901da177e4SLinus Torvalds 	reserve_memory();
491f6280099SMike Rapoport 	efi_memmap_walk(filter_memory, register_active_ranges);
4921da177e4SLinus Torvalds 
4931da177e4SLinus Torvalds 	if (num_online_nodes() == 0) {
4941da177e4SLinus Torvalds 		printk(KERN_ERR "node info missing!\n");
4951da177e4SLinus Torvalds 		node_set_online(0);
4961da177e4SLinus Torvalds 	}
4971da177e4SLinus Torvalds 
498564601a5Sbob.picco 	nodes_or(memory_less_mask, memory_less_mask, node_online_map);
4991da177e4SLinus Torvalds 	min_low_pfn = -1;
5001da177e4SLinus Torvalds 	max_low_pfn = 0;
5011da177e4SLinus Torvalds 
5021da177e4SLinus Torvalds 	/* These actually end up getting called by call_pernode_memory() */
5031da177e4SLinus Torvalds 	efi_memmap_walk(filter_rsvd_memory, build_node_maps);
5041da177e4SLinus Torvalds 	efi_memmap_walk(filter_rsvd_memory, find_pernode_space);
505a3f5c338SZou Nan hai 	efi_memmap_walk(find_max_min_low_pfn, NULL);
5061da177e4SLinus Torvalds 
507564601a5Sbob.picco 	for_each_online_node(node)
508fb63fbeeSMike Rapoport 		if (mem_data[node].min_pfn)
509564601a5Sbob.picco 			node_clear(node, memory_less_mask);
510139b8304SBob Picco 
5111da177e4SLinus Torvalds 	reserve_pernode_space();
512564601a5Sbob.picco 	memory_less_nodes();
5131da177e4SLinus Torvalds 	initialize_pernode_data();
5141da177e4SLinus Torvalds 
5151da177e4SLinus Torvalds 	max_pfn = max_low_pfn;
5161da177e4SLinus Torvalds 
5171da177e4SLinus Torvalds 	find_initrd();
5181da177e4SLinus Torvalds }
5191da177e4SLinus Torvalds 
5208d7e3517STony Luck #ifdef CONFIG_SMP
5211da177e4SLinus Torvalds /**
5221da177e4SLinus Torvalds  * per_cpu_init - setup per-cpu variables
5231da177e4SLinus Torvalds  *
5241da177e4SLinus Torvalds  * find_pernode_space() does most of this already, we just need to set
5251da177e4SLinus Torvalds  * local_per_cpu_offset
5261da177e4SLinus Torvalds  */
per_cpu_init(void)527ccce9bb8SPaul Gortmaker void *per_cpu_init(void)
5281da177e4SLinus Torvalds {
5291da177e4SLinus Torvalds 	int cpu;
530ff741906SAshok Raj 	static int first_time = 1;
531ff741906SAshok Raj 
532ff741906SAshok Raj 	if (first_time) {
533ff741906SAshok Raj 		first_time = 0;
5342c6e6db4Sholt@sgi.com 		for_each_possible_early_cpu(cpu)
5358d7e3517STony Luck 			per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
536ff741906SAshok Raj 	}
5371da177e4SLinus Torvalds 
5381da177e4SLinus Torvalds 	return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
5391da177e4SLinus Torvalds }
5408d7e3517STony Luck #endif /* CONFIG_SMP */
5411da177e4SLinus Torvalds 
5421da177e4SLinus Torvalds /**
5431da177e4SLinus Torvalds  * call_pernode_memory - use SRAT to call callback functions with node info
5441da177e4SLinus Torvalds  * @start: physical start of range
5451da177e4SLinus Torvalds  * @len: length of range
5461da177e4SLinus Torvalds  * @arg: function to call for each range
5471da177e4SLinus Torvalds  *
5481da177e4SLinus Torvalds  * efi_memmap_walk() knows nothing about layout of memory across nodes. Find
5491da177e4SLinus Torvalds  * out to which node a block of memory belongs.  Ignore memory that we cannot
5501da177e4SLinus Torvalds  * identify, and split blocks that run across multiple nodes.
5511da177e4SLinus Torvalds  *
5521da177e4SLinus Torvalds  * Take this opportunity to round the start address up and the end address
5531da177e4SLinus Torvalds  * down to page boundaries.
5541da177e4SLinus Torvalds  */
call_pernode_memory(unsigned long start,unsigned long len,void * arg)5551da177e4SLinus Torvalds void call_pernode_memory(unsigned long start, unsigned long len, void *arg)
5561da177e4SLinus Torvalds {
5571da177e4SLinus Torvalds 	unsigned long rs, re, end = start + len;
5581da177e4SLinus Torvalds 	void (*func)(unsigned long, unsigned long, int);
5591da177e4SLinus Torvalds 	int i;
5601da177e4SLinus Torvalds 
5611da177e4SLinus Torvalds 	start = PAGE_ALIGN(start);
5621da177e4SLinus Torvalds 	end &= PAGE_MASK;
5631da177e4SLinus Torvalds 	if (start >= end)
5641da177e4SLinus Torvalds 		return;
5651da177e4SLinus Torvalds 
5661da177e4SLinus Torvalds 	func = arg;
5671da177e4SLinus Torvalds 
5681da177e4SLinus Torvalds 	if (!num_node_memblks) {
5691da177e4SLinus Torvalds 		/* No SRAT table, so assume one node (node 0) */
5701da177e4SLinus Torvalds 		if (start < end)
5711da177e4SLinus Torvalds 			(*func)(start, end - start, 0);
5721da177e4SLinus Torvalds 		return;
5731da177e4SLinus Torvalds 	}
5741da177e4SLinus Torvalds 
5751da177e4SLinus Torvalds 	for (i = 0; i < num_node_memblks; i++) {
5761da177e4SLinus Torvalds 		rs = max(start, node_memblk[i].start_paddr);
5771da177e4SLinus Torvalds 		re = min(end, node_memblk[i].start_paddr +
5781da177e4SLinus Torvalds 			 node_memblk[i].size);
5791da177e4SLinus Torvalds 
5801da177e4SLinus Torvalds 		if (rs < re)
5811da177e4SLinus Torvalds 			(*func)(rs, re - rs, node_memblk[i].nid);
5821da177e4SLinus Torvalds 
5831da177e4SLinus Torvalds 		if (re == end)
5841da177e4SLinus Torvalds 			break;
5851da177e4SLinus Torvalds 	}
5861da177e4SLinus Torvalds }
5871da177e4SLinus Torvalds 
5881da177e4SLinus Torvalds /**
5891da177e4SLinus Torvalds  * paging_init - setup page tables
5901da177e4SLinus Torvalds  *
5911da177e4SLinus Torvalds  * paging_init() sets up the page tables for each node of the system and frees
5921da177e4SLinus Torvalds  * the bootmem allocator memory for general use.
5931da177e4SLinus Torvalds  */
paging_init(void)5941da177e4SLinus Torvalds void __init paging_init(void)
5951da177e4SLinus Torvalds {
5961da177e4SLinus Torvalds 	unsigned long max_dma;
59705e0caadSMel Gorman 	unsigned long max_zone_pfns[MAX_NR_ZONES];
5981da177e4SLinus Torvalds 
5991da177e4SLinus Torvalds 	max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;
6001da177e4SLinus Torvalds 
601524fd988SBob Picco 	sparse_init();
602524fd988SBob Picco 
6036391af17SMel Gorman 	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
604d5c23ebfSChristoph Hellwig 	max_zone_pfns[ZONE_DMA32] = max_dma;
605b90b5547SMike Rapoport 	max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
6069691a071SMike Rapoport 	free_area_init(max_zone_pfns);
60705e0caadSMel Gorman 
6081da177e4SLinus Torvalds 	zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
6091da177e4SLinus Torvalds }
6107049027cSYasunori Goto 
arch_alloc_nodedata(int nid)611*09f49dcaSMichal Hocko pg_data_t * __init arch_alloc_nodedata(int nid)
612dd0932d9SYasunori Goto {
613dd0932d9SYasunori Goto 	unsigned long size = compute_pernodesize(nid);
614dd0932d9SYasunori Goto 
615*09f49dcaSMichal Hocko 	return memblock_alloc(size, SMP_CACHE_BYTES);
616dd0932d9SYasunori Goto }
617dd0932d9SYasunori Goto 
arch_refresh_nodedata(int update_node,pg_data_t * update_pgdat)6187049027cSYasunori Goto void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat)
6197049027cSYasunori Goto {
6207049027cSYasunori Goto 	pgdat_list[update_node] = update_pgdat;
6217049027cSYasunori Goto 	scatter_node_data();
6227049027cSYasunori Goto }
623ef229c5aSChristoph Lameter 
624ef229c5aSChristoph Lameter #ifdef CONFIG_SPARSEMEM_VMEMMAP
vmemmap_populate(unsigned long start,unsigned long end,int node,struct vmem_altmap * altmap)6257b73d978SChristoph Hellwig int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
6267b73d978SChristoph Hellwig 		struct vmem_altmap *altmap)
627ef229c5aSChristoph Lameter {
6281d9cfee7SAnshuman Khandual 	return vmemmap_populate_basepages(start, end, node, NULL);
629ef229c5aSChristoph Lameter }
63046723bfaSYasuaki Ishimatsu 
vmemmap_free(unsigned long start,unsigned long end,struct vmem_altmap * altmap)63124b6d416SChristoph Hellwig void vmemmap_free(unsigned long start, unsigned long end,
63224b6d416SChristoph Hellwig 		struct vmem_altmap *altmap)
6330197518cSTang Chen {
6340197518cSTang Chen }
635ef229c5aSChristoph Lameter #endif
636