1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds * Copyright (c) 2000, 2003 Silicon Graphics, Inc. All rights reserved.
41da177e4SLinus Torvalds * Copyright (c) 2001 Intel Corp.
51da177e4SLinus Torvalds * Copyright (c) 2001 Tony Luck <tony.luck@intel.com>
61da177e4SLinus Torvalds * Copyright (c) 2002 NEC Corp.
71da177e4SLinus Torvalds * Copyright (c) 2002 Kimio Suganuma <k-suganuma@da.jp.nec.com>
81da177e4SLinus Torvalds * Copyright (c) 2004 Silicon Graphics, Inc
91da177e4SLinus Torvalds * Russ Anderson <rja@sgi.com>
101da177e4SLinus Torvalds * Jesse Barnes <jbarnes@sgi.com>
111da177e4SLinus Torvalds * Jack Steiner <steiner@sgi.com>
121da177e4SLinus Torvalds */
131da177e4SLinus Torvalds
141da177e4SLinus Torvalds /*
151da177e4SLinus Torvalds * Platform initialization for Discontig Memory
161da177e4SLinus Torvalds */
171da177e4SLinus Torvalds
181da177e4SLinus Torvalds #include <linux/kernel.h>
191da177e4SLinus Torvalds #include <linux/mm.h>
2099a19cf1SPrarit Bhargava #include <linux/nmi.h>
211da177e4SLinus Torvalds #include <linux/swap.h>
22f6280099SMike Rapoport #include <linux/memblock.h>
231da177e4SLinus Torvalds #include <linux/acpi.h>
241da177e4SLinus Torvalds #include <linux/efi.h>
251da177e4SLinus Torvalds #include <linux/nodemask.h>
265a0e3ad6STejun Heo #include <linux/slab.h>
278ff059b8SArd Biesheuvel #include <asm/efi.h>
281da177e4SLinus Torvalds #include <asm/tlb.h>
291da177e4SLinus Torvalds #include <asm/meminit.h>
301da177e4SLinus Torvalds #include <asm/numa.h>
311da177e4SLinus Torvalds #include <asm/sections.h>
321da177e4SLinus Torvalds
331da177e4SLinus Torvalds /*
341da177e4SLinus Torvalds * Track per-node information needed to setup the boot memory allocator, the
351da177e4SLinus Torvalds * per-node areas, and the real VM.
361da177e4SLinus Torvalds */
371da177e4SLinus Torvalds struct early_node_data {
381da177e4SLinus Torvalds struct ia64_node_data *node_data;
391da177e4SLinus Torvalds unsigned long pernode_addr;
401da177e4SLinus Torvalds unsigned long pernode_size;
411da177e4SLinus Torvalds unsigned long min_pfn;
421da177e4SLinus Torvalds unsigned long max_pfn;
431da177e4SLinus Torvalds };
441da177e4SLinus Torvalds
451da177e4SLinus Torvalds static struct early_node_data mem_data[MAX_NUMNODES] __initdata;
46564601a5Sbob.picco static nodemask_t memory_less_mask __initdata;
471da177e4SLinus Torvalds
48fd59d231SKen'ichi Ohmichi pg_data_t *pgdat_list[MAX_NUMNODES];
49ae5a2c1cSYasunori Goto
501da177e4SLinus Torvalds /*
511da177e4SLinus Torvalds * To prevent cache aliasing effects, align per-node structures so that they
521da177e4SLinus Torvalds * start at addresses that are strided by node number.
531da177e4SLinus Torvalds */
54acb7f672SJack Steiner #define MAX_NODE_ALIGN_OFFSET (32 * 1024 * 1024)
551da177e4SLinus Torvalds #define NODEDATA_ALIGN(addr, node) \
56acb7f672SJack Steiner ((((addr) + 1024*1024-1) & ~(1024*1024-1)) + \
57acb7f672SJack Steiner (((node)*PERCPU_PAGE_SIZE) & (MAX_NODE_ALIGN_OFFSET - 1)))
581da177e4SLinus Torvalds
591da177e4SLinus Torvalds /**
60fb63fbeeSMike Rapoport * build_node_maps - callback to setup mem_data structs for each node
611da177e4SLinus Torvalds * @start: physical start of range
621da177e4SLinus Torvalds * @len: length of range
631da177e4SLinus Torvalds * @node: node where this range resides
641da177e4SLinus Torvalds *
65fb63fbeeSMike Rapoport * Detect extents of each piece of memory that we wish to
661da177e4SLinus Torvalds * treat as a virtually contiguous block (i.e. each node). Each such block
671da177e4SLinus Torvalds * must start on an %IA64_GRANULE_SIZE boundary, so we round the address down
681da177e4SLinus Torvalds * if necessary. Any non-existent pages will simply be part of the virtual
69fb63fbeeSMike Rapoport * memmap.
701da177e4SLinus Torvalds */
build_node_maps(unsigned long start,unsigned long len,int node)711da177e4SLinus Torvalds static int __init build_node_maps(unsigned long start, unsigned long len,
721da177e4SLinus Torvalds int node)
731da177e4SLinus Torvalds {
743560e249SJohannes Weiner unsigned long spfn, epfn, end = start + len;
751da177e4SLinus Torvalds
761da177e4SLinus Torvalds epfn = GRANULEROUNDUP(end) >> PAGE_SHIFT;
773560e249SJohannes Weiner spfn = GRANULEROUNDDOWN(start) >> PAGE_SHIFT;
781da177e4SLinus Torvalds
79fb63fbeeSMike Rapoport if (!mem_data[node].min_pfn) {
80fb63fbeeSMike Rapoport mem_data[node].min_pfn = spfn;
81fb63fbeeSMike Rapoport mem_data[node].max_pfn = epfn;
821da177e4SLinus Torvalds } else {
83fb63fbeeSMike Rapoport mem_data[node].min_pfn = min(spfn, mem_data[node].min_pfn);
84fb63fbeeSMike Rapoport mem_data[node].max_pfn = max(epfn, mem_data[node].max_pfn);
851da177e4SLinus Torvalds }
861da177e4SLinus Torvalds
871da177e4SLinus Torvalds return 0;
881da177e4SLinus Torvalds }
891da177e4SLinus Torvalds
901da177e4SLinus Torvalds /**
911da177e4SLinus Torvalds * early_nr_cpus_node - return number of cpus on a given node
921da177e4SLinus Torvalds * @node: node to check
931da177e4SLinus Torvalds *
941da177e4SLinus Torvalds * Count the number of cpus on @node. We can't use nr_cpus_node() yet because
951da177e4SLinus Torvalds * acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been
961da177e4SLinus Torvalds * called yet. Note that node 0 will also count all non-existent cpus.
971da177e4SLinus Torvalds */
early_nr_cpus_node(int node)98e2af9da4SRandy Dunlap static int early_nr_cpus_node(int node)
991da177e4SLinus Torvalds {
1001da177e4SLinus Torvalds int cpu, n = 0;
1011da177e4SLinus Torvalds
1022c6e6db4Sholt@sgi.com for_each_possible_early_cpu(cpu)
1031da177e4SLinus Torvalds if (node == node_cpuid[cpu].nid)
1041da177e4SLinus Torvalds n++;
1051da177e4SLinus Torvalds
1061da177e4SLinus Torvalds return n;
1071da177e4SLinus Torvalds }
1081da177e4SLinus Torvalds
1091da177e4SLinus Torvalds /**
110564601a5Sbob.picco * compute_pernodesize - compute size of pernode data
111564601a5Sbob.picco * @node: the node id.
1121da177e4SLinus Torvalds */
compute_pernodesize(int node)113e2af9da4SRandy Dunlap static unsigned long compute_pernodesize(int node)
1141da177e4SLinus Torvalds {
115564601a5Sbob.picco unsigned long pernodesize = 0, cpus;
1161da177e4SLinus Torvalds
1171da177e4SLinus Torvalds cpus = early_nr_cpus_node(node);
1181da177e4SLinus Torvalds pernodesize += PERCPU_PAGE_SIZE * cpus;
1191da177e4SLinus Torvalds pernodesize += node * L1_CACHE_BYTES;
1201da177e4SLinus Torvalds pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t));
1211da177e4SLinus Torvalds pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
12241bd26d6Sholt@sgi.com pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t));
1231da177e4SLinus Torvalds pernodesize = PAGE_ALIGN(pernodesize);
124564601a5Sbob.picco return pernodesize;
125564601a5Sbob.picco }
1261da177e4SLinus Torvalds
127564601a5Sbob.picco /**
1288d7e3517STony Luck * per_cpu_node_setup - setup per-cpu areas on each node
1298d7e3517STony Luck * @cpu_data: per-cpu area on this node
1308d7e3517STony Luck * @node: node to setup
1318d7e3517STony Luck *
1328d7e3517STony Luck * Copy the static per-cpu data into the region we just set aside and then
1338d7e3517STony Luck * setup __per_cpu_offset for each CPU on this node. Return a pointer to
1348d7e3517STony Luck * the end of the area.
1358d7e3517STony Luck */
per_cpu_node_setup(void * cpu_data,int node)1368d7e3517STony Luck static void *per_cpu_node_setup(void *cpu_data, int node)
1378d7e3517STony Luck {
1388d7e3517STony Luck #ifdef CONFIG_SMP
1398d7e3517STony Luck int cpu;
1408d7e3517STony Luck
1412c6e6db4Sholt@sgi.com for_each_possible_early_cpu(cpu) {
14236886478STejun Heo void *src = cpu == 0 ? __cpu0_per_cpu : __phys_per_cpu_start;
14336886478STejun Heo
14436886478STejun Heo if (node != node_cpuid[cpu].nid)
14536886478STejun Heo continue;
14636886478STejun Heo
14736886478STejun Heo memcpy(__va(cpu_data), src, __per_cpu_end - __per_cpu_start);
1488d7e3517STony Luck __per_cpu_offset[cpu] = (char *)__va(cpu_data) -
1498d7e3517STony Luck __per_cpu_start;
15036886478STejun Heo
15136886478STejun Heo /*
15236886478STejun Heo * percpu area for cpu0 is moved from the __init area
15336886478STejun Heo * which is setup by head.S and used till this point.
15436886478STejun Heo * Update ar.k3. This move is ensures that percpu
15536886478STejun Heo * area for cpu0 is on the correct node and its
15636886478STejun Heo * virtual address isn't insanely far from other
15736886478STejun Heo * percpu areas which is important for congruent
15836886478STejun Heo * percpu allocator.
15936886478STejun Heo */
16036886478STejun Heo if (cpu == 0)
16136886478STejun Heo ia64_set_kr(IA64_KR_PER_CPU_DATA,
16236886478STejun Heo (unsigned long)cpu_data -
16336886478STejun Heo (unsigned long)__per_cpu_start);
16436886478STejun Heo
1658d7e3517STony Luck cpu_data += PERCPU_PAGE_SIZE;
1668d7e3517STony Luck }
1678d7e3517STony Luck #endif
1688d7e3517STony Luck return cpu_data;
1698d7e3517STony Luck }
1708d7e3517STony Luck
17152594762STejun Heo #ifdef CONFIG_SMP
17252594762STejun Heo /**
17352594762STejun Heo * setup_per_cpu_areas - setup percpu areas
17452594762STejun Heo *
17552594762STejun Heo * Arch code has already allocated and initialized percpu areas. All
17652594762STejun Heo * this function has to do is to teach the determined layout to the
17752594762STejun Heo * dynamic percpu allocator, which happens to be more complex than
17852594762STejun Heo * creating whole new ones using helpers.
17952594762STejun Heo */
setup_per_cpu_areas(void)18052594762STejun Heo void __init setup_per_cpu_areas(void)
18152594762STejun Heo {
18252594762STejun Heo struct pcpu_alloc_info *ai;
1833f649ab7SKees Cook struct pcpu_group_info *gi;
18452594762STejun Heo unsigned int *cpu_map;
18552594762STejun Heo void *base;
18652594762STejun Heo unsigned long base_offset;
18752594762STejun Heo unsigned int cpu;
18852594762STejun Heo ssize_t static_size, reserved_size, dyn_size;
189722e6f50STony Luck int node, prev_node, unit, nr_units;
19052594762STejun Heo
19152594762STejun Heo ai = pcpu_alloc_alloc_info(MAX_NUMNODES, nr_cpu_ids);
19252594762STejun Heo if (!ai)
19352594762STejun Heo panic("failed to allocate pcpu_alloc_info");
19452594762STejun Heo cpu_map = ai->groups[0].cpu_map;
19552594762STejun Heo
19652594762STejun Heo /* determine base */
19752594762STejun Heo base = (void *)ULONG_MAX;
19852594762STejun Heo for_each_possible_cpu(cpu)
19952594762STejun Heo base = min(base,
20052594762STejun Heo (void *)(__per_cpu_offset[cpu] + __per_cpu_start));
20152594762STejun Heo base_offset = (void *)__per_cpu_start - base;
20252594762STejun Heo
20352594762STejun Heo /* build cpu_map, units are grouped by node */
20452594762STejun Heo unit = 0;
20552594762STejun Heo for_each_node(node)
20652594762STejun Heo for_each_possible_cpu(cpu)
20752594762STejun Heo if (node == node_cpuid[cpu].nid)
20852594762STejun Heo cpu_map[unit++] = cpu;
20952594762STejun Heo nr_units = unit;
21052594762STejun Heo
21152594762STejun Heo /* set basic parameters */
21252594762STejun Heo static_size = __per_cpu_end - __per_cpu_start;
21352594762STejun Heo reserved_size = PERCPU_MODULE_RESERVE;
21452594762STejun Heo dyn_size = PERCPU_PAGE_SIZE - static_size - reserved_size;
21552594762STejun Heo if (dyn_size < 0)
21652594762STejun Heo panic("percpu area overflow static=%zd reserved=%zd\n",
21752594762STejun Heo static_size, reserved_size);
21852594762STejun Heo
21952594762STejun Heo ai->static_size = static_size;
22052594762STejun Heo ai->reserved_size = reserved_size;
22152594762STejun Heo ai->dyn_size = dyn_size;
22252594762STejun Heo ai->unit_size = PERCPU_PAGE_SIZE;
22352594762STejun Heo ai->atom_size = PAGE_SIZE;
22452594762STejun Heo ai->alloc_size = PERCPU_PAGE_SIZE;
22552594762STejun Heo
22652594762STejun Heo /*
22752594762STejun Heo * CPUs are put into groups according to node. Walk cpu_map
22852594762STejun Heo * and create new groups at node boundaries.
22952594762STejun Heo */
23098fa15f3SAnshuman Khandual prev_node = NUMA_NO_NODE;
23152594762STejun Heo ai->nr_groups = 0;
23252594762STejun Heo for (unit = 0; unit < nr_units; unit++) {
23352594762STejun Heo cpu = cpu_map[unit];
23452594762STejun Heo node = node_cpuid[cpu].nid;
23552594762STejun Heo
23652594762STejun Heo if (node == prev_node) {
23752594762STejun Heo gi->nr_units++;
23852594762STejun Heo continue;
23952594762STejun Heo }
24052594762STejun Heo prev_node = node;
24152594762STejun Heo
24252594762STejun Heo gi = &ai->groups[ai->nr_groups++];
24352594762STejun Heo gi->nr_units = 1;
24452594762STejun Heo gi->base_offset = __per_cpu_offset[cpu] + base_offset;
24552594762STejun Heo gi->cpu_map = &cpu_map[unit];
24652594762STejun Heo }
24752594762STejun Heo
248163fa234SKefeng Wang pcpu_setup_first_chunk(ai, base);
24952594762STejun Heo pcpu_free_alloc_info(ai);
25052594762STejun Heo }
25152594762STejun Heo #endif
25252594762STejun Heo
2538d7e3517STony Luck /**
254564601a5Sbob.picco * fill_pernode - initialize pernode data.
255564601a5Sbob.picco * @node: the node id.
256564601a5Sbob.picco * @pernode: physical address of pernode data
257564601a5Sbob.picco * @pernodesize: size of the pernode data
258564601a5Sbob.picco */
fill_pernode(int node,unsigned long pernode,unsigned long pernodesize)259564601a5Sbob.picco static void __init fill_pernode(int node, unsigned long pernode,
260564601a5Sbob.picco unsigned long pernodesize)
261564601a5Sbob.picco {
262564601a5Sbob.picco void *cpu_data;
2638d7e3517STony Luck int cpus = early_nr_cpus_node(node);
264564601a5Sbob.picco
2651da177e4SLinus Torvalds mem_data[node].pernode_addr = pernode;
2661da177e4SLinus Torvalds mem_data[node].pernode_size = pernodesize;
2671da177e4SLinus Torvalds memset(__va(pernode), 0, pernodesize);
2681da177e4SLinus Torvalds
2691da177e4SLinus Torvalds cpu_data = (void *)pernode;
2701da177e4SLinus Torvalds pernode += PERCPU_PAGE_SIZE * cpus;
2711da177e4SLinus Torvalds pernode += node * L1_CACHE_BYTES;
2721da177e4SLinus Torvalds
273ae5a2c1cSYasunori Goto pgdat_list[node] = __va(pernode);
2741da177e4SLinus Torvalds pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));
2751da177e4SLinus Torvalds
2761da177e4SLinus Torvalds mem_data[node].node_data = __va(pernode);
2771da177e4SLinus Torvalds pernode += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
2781da177e4SLinus Torvalds pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));
2791da177e4SLinus Torvalds
2808d7e3517STony Luck cpu_data = per_cpu_node_setup(cpu_data, node);
281564601a5Sbob.picco
282564601a5Sbob.picco return;
2831da177e4SLinus Torvalds }
2848d7e3517STony Luck
285564601a5Sbob.picco /**
286564601a5Sbob.picco * find_pernode_space - allocate memory for memory map and per-node structures
287564601a5Sbob.picco * @start: physical start of range
288564601a5Sbob.picco * @len: length of range
289564601a5Sbob.picco * @node: node where this range resides
290564601a5Sbob.picco *
291564601a5Sbob.picco * This routine reserves space for the per-cpu data struct, the list of
292564601a5Sbob.picco * pg_data_ts and the per-node data struct. Each node will have something like
293564601a5Sbob.picco * the following in the first chunk of addr. space large enough to hold it.
294564601a5Sbob.picco *
295564601a5Sbob.picco * ________________________
296564601a5Sbob.picco * | |
297564601a5Sbob.picco * |~~~~~~~~~~~~~~~~~~~~~~~~| <-- NODEDATA_ALIGN(start, node) for the first
298564601a5Sbob.picco * | PERCPU_PAGE_SIZE * | start and length big enough
299564601a5Sbob.picco * | cpus_on_this_node | Node 0 will also have entries for all non-existent cpus.
300564601a5Sbob.picco * |------------------------|
301564601a5Sbob.picco * | local pg_data_t * |
302564601a5Sbob.picco * |------------------------|
303564601a5Sbob.picco * | local ia64_node_data |
304564601a5Sbob.picco * |------------------------|
305564601a5Sbob.picco * | ??? |
306564601a5Sbob.picco * |________________________|
307564601a5Sbob.picco *
308564601a5Sbob.picco * Once this space has been set aside, the bootmem maps are initialized. We
309564601a5Sbob.picco * could probably move the allocation of the per-cpu and ia64_node_data space
310564601a5Sbob.picco * outside of this function and use alloc_bootmem_node(), but doing it here
311564601a5Sbob.picco * is straightforward and we get the alignments we want so...
312564601a5Sbob.picco */
find_pernode_space(unsigned long start,unsigned long len,int node)313564601a5Sbob.picco static int __init find_pernode_space(unsigned long start, unsigned long len,
314564601a5Sbob.picco int node)
315564601a5Sbob.picco {
3163560e249SJohannes Weiner unsigned long spfn, epfn;
317f6280099SMike Rapoport unsigned long pernodesize = 0, pernode;
318564601a5Sbob.picco
3193560e249SJohannes Weiner spfn = start >> PAGE_SHIFT;
320564601a5Sbob.picco epfn = (start + len) >> PAGE_SHIFT;
321564601a5Sbob.picco
322564601a5Sbob.picco /*
323564601a5Sbob.picco * Make sure this memory falls within this node's usable memory
324564601a5Sbob.picco * since we may have thrown some away in build_maps().
325564601a5Sbob.picco */
326fb63fbeeSMike Rapoport if (spfn < mem_data[node].min_pfn || epfn > mem_data[node].max_pfn)
327564601a5Sbob.picco return 0;
328564601a5Sbob.picco
329564601a5Sbob.picco /* Don't setup this node's local space twice... */
330564601a5Sbob.picco if (mem_data[node].pernode_addr)
331564601a5Sbob.picco return 0;
332564601a5Sbob.picco
333564601a5Sbob.picco /*
334564601a5Sbob.picco * Calculate total size needed, incl. what's necessary
335564601a5Sbob.picco * for good alignment and alias prevention.
336564601a5Sbob.picco */
337564601a5Sbob.picco pernodesize = compute_pernodesize(node);
338564601a5Sbob.picco pernode = NODEDATA_ALIGN(start, node);
339564601a5Sbob.picco
340564601a5Sbob.picco /* Is this range big enough for what we want to store here? */
341f6280099SMike Rapoport if (start + len > (pernode + pernodesize))
342564601a5Sbob.picco fill_pernode(node, pernode, pernodesize);
3431da177e4SLinus Torvalds
3441da177e4SLinus Torvalds return 0;
3451da177e4SLinus Torvalds }
3461da177e4SLinus Torvalds
3471da177e4SLinus Torvalds /**
3481da177e4SLinus Torvalds * reserve_pernode_space - reserve memory for per-node space
3491da177e4SLinus Torvalds *
3501da177e4SLinus Torvalds * Reserve the space used by the bootmem maps & per-node space in the boot
3511da177e4SLinus Torvalds * allocator so that when we actually create the real mem maps we don't
3521da177e4SLinus Torvalds * use their memory.
3531da177e4SLinus Torvalds */
reserve_pernode_space(void)3541da177e4SLinus Torvalds static void __init reserve_pernode_space(void)
3551da177e4SLinus Torvalds {
356f6280099SMike Rapoport unsigned long base, size;
3571da177e4SLinus Torvalds int node;
3581da177e4SLinus Torvalds
3591da177e4SLinus Torvalds for_each_online_node(node) {
360564601a5Sbob.picco if (node_isset(node, memory_less_mask))
361564601a5Sbob.picco continue;
362564601a5Sbob.picco
3631da177e4SLinus Torvalds /* Now the per-node space */
3641da177e4SLinus Torvalds size = mem_data[node].pernode_size;
3651da177e4SLinus Torvalds base = __pa(mem_data[node].pernode_addr);
366f6280099SMike Rapoport memblock_reserve(base, size);
3671da177e4SLinus Torvalds }
3681da177e4SLinus Torvalds }
3691da177e4SLinus Torvalds
scatter_node_data(void)370e2af9da4SRandy Dunlap static void scatter_node_data(void)
3717049027cSYasunori Goto {
3727049027cSYasunori Goto pg_data_t **dst;
3737049027cSYasunori Goto int node;
3747049027cSYasunori Goto
375dd8041f1SYasunori Goto /*
376dd8041f1SYasunori Goto * for_each_online_node() can't be used at here.
377dd8041f1SYasunori Goto * node_online_map is not set for hot-added nodes at this time,
378dd8041f1SYasunori Goto * because we are halfway through initialization of the new node's
379dd8041f1SYasunori Goto * structures. If for_each_online_node() is used, a new node's
38072fdbdceSSimon Arlott * pg_data_ptrs will be not initialized. Instead of using it,
381dd8041f1SYasunori Goto * pgdat_list[] is checked.
382dd8041f1SYasunori Goto */
383dd8041f1SYasunori Goto for_each_node(node) {
384dd8041f1SYasunori Goto if (pgdat_list[node]) {
3857049027cSYasunori Goto dst = LOCAL_DATA_ADDR(pgdat_list[node])->pg_data_ptrs;
3867049027cSYasunori Goto memcpy(dst, pgdat_list, sizeof(pgdat_list));
3877049027cSYasunori Goto }
3887049027cSYasunori Goto }
389dd8041f1SYasunori Goto }
3907049027cSYasunori Goto
3911da177e4SLinus Torvalds /**
3921da177e4SLinus Torvalds * initialize_pernode_data - fixup per-cpu & per-node pointers
3931da177e4SLinus Torvalds *
3941da177e4SLinus Torvalds * Each node's per-node area has a copy of the global pg_data_t list, so
3951da177e4SLinus Torvalds * we copy that to each node here, as well as setting the per-cpu pointer
39605933aacSChristoph Hellwig * to the local node data structure.
3971da177e4SLinus Torvalds */
initialize_pernode_data(void)3981da177e4SLinus Torvalds static void __init initialize_pernode_data(void)
3991da177e4SLinus Torvalds {
4008d7e3517STony Luck int cpu, node;
4011da177e4SLinus Torvalds
4027049027cSYasunori Goto scatter_node_data();
4037049027cSYasunori Goto
4048d7e3517STony Luck #ifdef CONFIG_SMP
4051da177e4SLinus Torvalds /* Set the node_data pointer for each per-cpu struct */
4062c6e6db4Sholt@sgi.com for_each_possible_early_cpu(cpu) {
4071da177e4SLinus Torvalds node = node_cpuid[cpu].nid;
408877105ccSTejun Heo per_cpu(ia64_cpu_info, cpu).node_data =
409877105ccSTejun Heo mem_data[node].node_data;
4101da177e4SLinus Torvalds }
4118d7e3517STony Luck #else
4128d7e3517STony Luck {
4138d7e3517STony Luck struct cpuinfo_ia64 *cpu0_cpu_info;
4148d7e3517STony Luck cpu = 0;
4158d7e3517STony Luck node = node_cpuid[cpu].nid;
4168d7e3517STony Luck cpu0_cpu_info = (struct cpuinfo_ia64 *)(__phys_per_cpu_start +
417dd17c8f7SRusty Russell ((char *)&ia64_cpu_info - __per_cpu_start));
4188d7e3517STony Luck cpu0_cpu_info->node_data = mem_data[node].node_data;
4198d7e3517STony Luck }
4208d7e3517STony Luck #endif /* CONFIG_SMP */
4211da177e4SLinus Torvalds }
4221da177e4SLinus Torvalds
4231da177e4SLinus Torvalds /**
424564601a5Sbob.picco * memory_less_node_alloc - * attempt to allocate memory on the best NUMA slit
425564601a5Sbob.picco * node but fall back to any other node when __alloc_bootmem_node fails
426564601a5Sbob.picco * for best.
427564601a5Sbob.picco * @nid: node id
428564601a5Sbob.picco * @pernodesize: size of this node's pernode data
429564601a5Sbob.picco */
memory_less_node_alloc(int nid,unsigned long pernodesize)43097835245SBob Picco static void __init *memory_less_node_alloc(int nid, unsigned long pernodesize)
431564601a5Sbob.picco {
432564601a5Sbob.picco void *ptr = NULL;
433564601a5Sbob.picco u8 best = 0xff;
43498fa15f3SAnshuman Khandual int bestnode = NUMA_NO_NODE, node, anynode = 0;
435564601a5Sbob.picco
436564601a5Sbob.picco for_each_online_node(node) {
437564601a5Sbob.picco if (node_isset(node, memory_less_mask))
438564601a5Sbob.picco continue;
439564601a5Sbob.picco else if (node_distance(nid, node) < best) {
440564601a5Sbob.picco best = node_distance(nid, node);
441564601a5Sbob.picco bestnode = node;
442564601a5Sbob.picco }
44397835245SBob Picco anynode = node;
444564601a5Sbob.picco }
445564601a5Sbob.picco
44698fa15f3SAnshuman Khandual if (bestnode == NUMA_NO_NODE)
44797835245SBob Picco bestnode = anynode;
448564601a5Sbob.picco
449ccfa2a0fSMike Rapoport ptr = memblock_alloc_try_nid(pernodesize, PERCPU_PAGE_SIZE,
450ccfa2a0fSMike Rapoport __pa(MAX_DMA_ADDRESS),
45197ad1087SMike Rapoport MEMBLOCK_ALLOC_ACCESSIBLE,
452ccfa2a0fSMike Rapoport bestnode);
453d80db5c1SMike Rapoport if (!ptr)
454d80db5c1SMike Rapoport panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%lx\n",
455d80db5c1SMike Rapoport __func__, pernodesize, PERCPU_PAGE_SIZE, bestnode,
456d80db5c1SMike Rapoport __pa(MAX_DMA_ADDRESS));
45797835245SBob Picco
458564601a5Sbob.picco return ptr;
459564601a5Sbob.picco }
460564601a5Sbob.picco
461564601a5Sbob.picco /**
462564601a5Sbob.picco * memory_less_nodes - allocate and initialize CPU only nodes pernode
463564601a5Sbob.picco * information.
464564601a5Sbob.picco */
memory_less_nodes(void)465564601a5Sbob.picco static void __init memory_less_nodes(void)
466564601a5Sbob.picco {
467564601a5Sbob.picco unsigned long pernodesize;
468564601a5Sbob.picco void *pernode;
469564601a5Sbob.picco int node;
470564601a5Sbob.picco
471564601a5Sbob.picco for_each_node_mask(node, memory_less_mask) {
472564601a5Sbob.picco pernodesize = compute_pernodesize(node);
47397835245SBob Picco pernode = memory_less_node_alloc(node, pernodesize);
474564601a5Sbob.picco fill_pernode(node, __pa(pernode), pernodesize);
475564601a5Sbob.picco }
476564601a5Sbob.picco
477564601a5Sbob.picco return;
478564601a5Sbob.picco }
479564601a5Sbob.picco
480564601a5Sbob.picco /**
4811da177e4SLinus Torvalds * find_memory - walk the EFI memory map and setup the bootmem allocator
4821da177e4SLinus Torvalds *
4831da177e4SLinus Torvalds * Called early in boot to setup the bootmem allocator, and to
4841da177e4SLinus Torvalds * allocate the per-cpu and per-node structures.
4851da177e4SLinus Torvalds */
find_memory(void)4861da177e4SLinus Torvalds void __init find_memory(void)
4871da177e4SLinus Torvalds {
4881da177e4SLinus Torvalds int node;
4891da177e4SLinus Torvalds
4901da177e4SLinus Torvalds reserve_memory();
491f6280099SMike Rapoport efi_memmap_walk(filter_memory, register_active_ranges);
4921da177e4SLinus Torvalds
4931da177e4SLinus Torvalds if (num_online_nodes() == 0) {
4941da177e4SLinus Torvalds printk(KERN_ERR "node info missing!\n");
4951da177e4SLinus Torvalds node_set_online(0);
4961da177e4SLinus Torvalds }
4971da177e4SLinus Torvalds
498564601a5Sbob.picco nodes_or(memory_less_mask, memory_less_mask, node_online_map);
4991da177e4SLinus Torvalds min_low_pfn = -1;
5001da177e4SLinus Torvalds max_low_pfn = 0;
5011da177e4SLinus Torvalds
5021da177e4SLinus Torvalds /* These actually end up getting called by call_pernode_memory() */
5031da177e4SLinus Torvalds efi_memmap_walk(filter_rsvd_memory, build_node_maps);
5041da177e4SLinus Torvalds efi_memmap_walk(filter_rsvd_memory, find_pernode_space);
505a3f5c338SZou Nan hai efi_memmap_walk(find_max_min_low_pfn, NULL);
5061da177e4SLinus Torvalds
507564601a5Sbob.picco for_each_online_node(node)
508fb63fbeeSMike Rapoport if (mem_data[node].min_pfn)
509564601a5Sbob.picco node_clear(node, memory_less_mask);
510139b8304SBob Picco
5111da177e4SLinus Torvalds reserve_pernode_space();
512564601a5Sbob.picco memory_less_nodes();
5131da177e4SLinus Torvalds initialize_pernode_data();
5141da177e4SLinus Torvalds
5151da177e4SLinus Torvalds max_pfn = max_low_pfn;
5161da177e4SLinus Torvalds
5171da177e4SLinus Torvalds find_initrd();
5181da177e4SLinus Torvalds }
5191da177e4SLinus Torvalds
5208d7e3517STony Luck #ifdef CONFIG_SMP
5211da177e4SLinus Torvalds /**
5221da177e4SLinus Torvalds * per_cpu_init - setup per-cpu variables
5231da177e4SLinus Torvalds *
5241da177e4SLinus Torvalds * find_pernode_space() does most of this already, we just need to set
5251da177e4SLinus Torvalds * local_per_cpu_offset
5261da177e4SLinus Torvalds */
per_cpu_init(void)527ccce9bb8SPaul Gortmaker void *per_cpu_init(void)
5281da177e4SLinus Torvalds {
5291da177e4SLinus Torvalds int cpu;
530ff741906SAshok Raj static int first_time = 1;
531ff741906SAshok Raj
532ff741906SAshok Raj if (first_time) {
533ff741906SAshok Raj first_time = 0;
5342c6e6db4Sholt@sgi.com for_each_possible_early_cpu(cpu)
5358d7e3517STony Luck per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
536ff741906SAshok Raj }
5371da177e4SLinus Torvalds
5381da177e4SLinus Torvalds return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
5391da177e4SLinus Torvalds }
5408d7e3517STony Luck #endif /* CONFIG_SMP */
5411da177e4SLinus Torvalds
5421da177e4SLinus Torvalds /**
5431da177e4SLinus Torvalds * call_pernode_memory - use SRAT to call callback functions with node info
5441da177e4SLinus Torvalds * @start: physical start of range
5451da177e4SLinus Torvalds * @len: length of range
5461da177e4SLinus Torvalds * @arg: function to call for each range
5471da177e4SLinus Torvalds *
5481da177e4SLinus Torvalds * efi_memmap_walk() knows nothing about layout of memory across nodes. Find
5491da177e4SLinus Torvalds * out to which node a block of memory belongs. Ignore memory that we cannot
5501da177e4SLinus Torvalds * identify, and split blocks that run across multiple nodes.
5511da177e4SLinus Torvalds *
5521da177e4SLinus Torvalds * Take this opportunity to round the start address up and the end address
5531da177e4SLinus Torvalds * down to page boundaries.
5541da177e4SLinus Torvalds */
call_pernode_memory(unsigned long start,unsigned long len,void * arg)5551da177e4SLinus Torvalds void call_pernode_memory(unsigned long start, unsigned long len, void *arg)
5561da177e4SLinus Torvalds {
5571da177e4SLinus Torvalds unsigned long rs, re, end = start + len;
5581da177e4SLinus Torvalds void (*func)(unsigned long, unsigned long, int);
5591da177e4SLinus Torvalds int i;
5601da177e4SLinus Torvalds
5611da177e4SLinus Torvalds start = PAGE_ALIGN(start);
5621da177e4SLinus Torvalds end &= PAGE_MASK;
5631da177e4SLinus Torvalds if (start >= end)
5641da177e4SLinus Torvalds return;
5651da177e4SLinus Torvalds
5661da177e4SLinus Torvalds func = arg;
5671da177e4SLinus Torvalds
5681da177e4SLinus Torvalds if (!num_node_memblks) {
5691da177e4SLinus Torvalds /* No SRAT table, so assume one node (node 0) */
5701da177e4SLinus Torvalds if (start < end)
5711da177e4SLinus Torvalds (*func)(start, end - start, 0);
5721da177e4SLinus Torvalds return;
5731da177e4SLinus Torvalds }
5741da177e4SLinus Torvalds
5751da177e4SLinus Torvalds for (i = 0; i < num_node_memblks; i++) {
5761da177e4SLinus Torvalds rs = max(start, node_memblk[i].start_paddr);
5771da177e4SLinus Torvalds re = min(end, node_memblk[i].start_paddr +
5781da177e4SLinus Torvalds node_memblk[i].size);
5791da177e4SLinus Torvalds
5801da177e4SLinus Torvalds if (rs < re)
5811da177e4SLinus Torvalds (*func)(rs, re - rs, node_memblk[i].nid);
5821da177e4SLinus Torvalds
5831da177e4SLinus Torvalds if (re == end)
5841da177e4SLinus Torvalds break;
5851da177e4SLinus Torvalds }
5861da177e4SLinus Torvalds }
5871da177e4SLinus Torvalds
5881da177e4SLinus Torvalds /**
5891da177e4SLinus Torvalds * paging_init - setup page tables
5901da177e4SLinus Torvalds *
5911da177e4SLinus Torvalds * paging_init() sets up the page tables for each node of the system and frees
5921da177e4SLinus Torvalds * the bootmem allocator memory for general use.
5931da177e4SLinus Torvalds */
paging_init(void)5941da177e4SLinus Torvalds void __init paging_init(void)
5951da177e4SLinus Torvalds {
5961da177e4SLinus Torvalds unsigned long max_dma;
59705e0caadSMel Gorman unsigned long max_zone_pfns[MAX_NR_ZONES];
5981da177e4SLinus Torvalds
5991da177e4SLinus Torvalds max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;
6001da177e4SLinus Torvalds
601524fd988SBob Picco sparse_init();
602524fd988SBob Picco
6036391af17SMel Gorman memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
604d5c23ebfSChristoph Hellwig max_zone_pfns[ZONE_DMA32] = max_dma;
605b90b5547SMike Rapoport max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
6069691a071SMike Rapoport free_area_init(max_zone_pfns);
60705e0caadSMel Gorman
6081da177e4SLinus Torvalds zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
6091da177e4SLinus Torvalds }
6107049027cSYasunori Goto
arch_alloc_nodedata(int nid)611*09f49dcaSMichal Hocko pg_data_t * __init arch_alloc_nodedata(int nid)
612dd0932d9SYasunori Goto {
613dd0932d9SYasunori Goto unsigned long size = compute_pernodesize(nid);
614dd0932d9SYasunori Goto
615*09f49dcaSMichal Hocko return memblock_alloc(size, SMP_CACHE_BYTES);
616dd0932d9SYasunori Goto }
617dd0932d9SYasunori Goto
arch_refresh_nodedata(int update_node,pg_data_t * update_pgdat)6187049027cSYasunori Goto void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat)
6197049027cSYasunori Goto {
6207049027cSYasunori Goto pgdat_list[update_node] = update_pgdat;
6217049027cSYasunori Goto scatter_node_data();
6227049027cSYasunori Goto }
623ef229c5aSChristoph Lameter
624ef229c5aSChristoph Lameter #ifdef CONFIG_SPARSEMEM_VMEMMAP
vmemmap_populate(unsigned long start,unsigned long end,int node,struct vmem_altmap * altmap)6257b73d978SChristoph Hellwig int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
6267b73d978SChristoph Hellwig struct vmem_altmap *altmap)
627ef229c5aSChristoph Lameter {
6281d9cfee7SAnshuman Khandual return vmemmap_populate_basepages(start, end, node, NULL);
629ef229c5aSChristoph Lameter }
63046723bfaSYasuaki Ishimatsu
vmemmap_free(unsigned long start,unsigned long end,struct vmem_altmap * altmap)63124b6d416SChristoph Hellwig void vmemmap_free(unsigned long start, unsigned long end,
63224b6d416SChristoph Hellwig struct vmem_altmap *altmap)
6330197518cSTang Chen {
6340197518cSTang Chen }
635ef229c5aSChristoph Lameter #endif
636