memory_present() is how each arch/subarch will tell sparse where all of its memory is. This is what triggers sparse to go out and create its mappings for the memory, as well as allocate the mem_map[]. By: Andy Whitcroft Signed-off-by: Dave Hansen --- memhotplug-dave/arch/i386/kernel/numaq.c | 4 ++- memhotplug-dave/arch/i386/kernel/srat.c | 6 ++++ memhotplug-dave/arch/i386/mm/discontig.c | 38 ++++++++++++++++--------------- memhotplug-dave/arch/ppc64/mm/numa.c | 22 +++++++++++++---- 4 files changed, 45 insertions(+), 25 deletions(-) diff -puN arch/i386/kernel/numaq.c~B-sparse-140-abstract-discontig arch/i386/kernel/numaq.c --- memhotplug/arch/i386/kernel/numaq.c~B-sparse-140-abstract-discontig 2005-02-17 15:25:35.000000000 -0800 +++ memhotplug-dave/arch/i386/kernel/numaq.c 2005-02-17 15:25:35.000000000 -0800 @@ -32,7 +32,7 @@ #include /* These are needed before the pgdat's are created */ -extern long node_start_pfn[], node_end_pfn[]; +extern long node_start_pfn[], node_end_pfn[], node_remap_size[]; #define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) @@ -59,6 +59,8 @@ static void __init smp_dump_qct(void) eq->hi_shrd_mem_start - eq->priv_mem_size); node_end_pfn[node] = MB_TO_PAGES( eq->hi_shrd_mem_start + eq->hi_shrd_mem_size); + node_remap_size[node] += memory_present(node, + node_start_pfn[node], node_end_pfn[node]); } } } diff -puN arch/i386/kernel/srat.c~B-sparse-140-abstract-discontig arch/i386/kernel/srat.c --- memhotplug/arch/i386/kernel/srat.c~B-sparse-140-abstract-discontig 2005-02-17 15:25:35.000000000 -0800 +++ memhotplug-dave/arch/i386/kernel/srat.c 2005-02-17 15:25:35.000000000 -0800 @@ -58,7 +58,7 @@ static int num_memory_chunks; /* total static int zholes_size_init; static unsigned long zholes_size[MAX_NUMNODES * MAX_NR_ZONES]; -extern unsigned long node_start_pfn[], node_end_pfn[]; +extern unsigned long node_start_pfn[], node_end_pfn[], node_remap_size[]; extern void * boot_ioremap(unsigned long, unsigned long); @@ -266,6 +266,10 @@ static int __init acpi20_parse_srat(stru j, node_memory_chunk[j].nid, node_memory_chunk[j].start_pfn, node_memory_chunk[j].end_pfn); + node_remap_size[node_memory_chunk[j].nid] += memory_present( + node_memory_chunk[j].nid, + node_memory_chunk[j].start_pfn, + node_memory_chunk[j].end_pfn); } /*calculate node_start_pfn/node_end_pfn arrays*/ diff -puN arch/i386/mm/discontig.c~B-sparse-140-abstract-discontig arch/i386/mm/discontig.c --- memhotplug/arch/i386/mm/discontig.c~B-sparse-140-abstract-discontig 2005-02-17 15:25:35.000000000 -0800 +++ memhotplug-dave/arch/i386/mm/discontig.c 2005-02-17 15:25:35.000000000 -0800 @@ -60,6 +60,23 @@ bootmem_data_t node0_bdata; */ s8 physnode_map[MAX_ELEMENTS] = { [0 ... (MAX_ELEMENTS - 1)] = -1}; +unsigned long memory_present(int nid, unsigned long start, unsigned long end) +{ + unsigned long pfn; + + printk(KERN_INFO "Node: %d, start_pfn: %ld, end_pfn: %ld\n", + nid, start, end); + printk(KERN_DEBUG " Setting physnode_map array to node %d for pfns:\n", nid); + printk(KERN_DEBUG " "); + for (pfn = start; pfn < end; pfn += PAGES_PER_ELEMENT) { + physnode_map[pfn / PAGES_PER_ELEMENT] = nid; + printk(KERN_DEBUG "%ld ", pfn); + } + printk(KERN_DEBUG "\n"); + + return (end - start + 1) * sizeof(struct page); +} + unsigned long node_start_pfn[MAX_NUMNODES]; unsigned long node_end_pfn[MAX_NUMNODES]; @@ -199,9 +216,9 @@ static unsigned long calculate_numa_rema if (node_end_pfn[nid] > max_pfn) node_end_pfn[nid] = max_pfn; - /* calculate the size of the mem_map needed in bytes */ - size = (node_end_pfn[nid] - node_start_pfn[nid] + 1) - * sizeof(struct page) + sizeof(pg_data_t); + /* ensure the remap includes space for the pgdat. */ + size = node_remap_size[nid] + sizeof(pg_data_t); + /* convert size to large (pmd size) pages, rounding up */ size = (size + LARGE_PAGE_BYTES - 1) / LARGE_PAGE_BYTES; /* now the roundup is correct, convert to PAGE_SIZE pages */ @@ -250,21 +267,6 @@ unsigned long __init setup_memory(void) */ get_memcfg_numa(); - /* Fill in the physnode_map */ - for_each_online_node(nid) { - printk("Node: %d, start_pfn: %ld, end_pfn: %ld\n", - nid, node_start_pfn[nid], node_end_pfn[nid]); - printk(" Setting physnode_map array to node %d for pfns:\n ", - nid); - for (pfn = node_start_pfn[nid]; pfn < node_end_pfn[nid]; - pfn += PAGES_PER_ELEMENT) { - physnode_map[pfn / PAGES_PER_ELEMENT] = nid; - printk("%ld ", pfn); - } - printk("\n"); - } - - find_max_pfn(); reserve_pages = calculate_numa_remap_pages(); /* partially used pages are not usable - thus round upwards */ diff -puN arch/ppc64/mm/numa.c~B-sparse-140-abstract-discontig arch/ppc64/mm/numa.c --- memhotplug/arch/ppc64/mm/numa.c~B-sparse-140-abstract-discontig 2005-02-17 15:25:35.000000000 -0800 +++ memhotplug-dave/arch/ppc64/mm/numa.c 2005-02-17 15:25:35.000000000 -0800 @@ -58,6 +58,20 @@ EXPORT_SYMBOL(numa_memory_lookup_table); EXPORT_SYMBOL(numa_cpumask_lookup_table); EXPORT_SYMBOL(nr_cpus_in_node); +unsigned long memory_present(int nid, unsigned long start, unsigned long end) +{ + unsigned long i; + + /* XXX/APW: fix the loop instead ... */ + start <<= PAGE_SHIFT; + end <<= PAGE_SHIFT; + + for (i = start ; i < end; i += MEMORY_INCREMENT) + numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = nid; + + return 0; +} + static inline void map_cpu_to_node(int cpu, int node) { numa_cpu_lookup_table[cpu] = node; @@ -378,9 +392,8 @@ new_range: size / PAGE_SIZE; } - for (i = start ; i < (start+size); i += MEMORY_INCREMENT) - numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = - numa_domain; + memory_present(numa_domain, start >> PAGE_SHIFT, + (start + size) >> PAGE_SHIFT); ranges--; if (ranges) @@ -428,8 +441,7 @@ static void __init setup_nonnuma(void) init_node_data[0].node_start_pfn = 0; init_node_data[0].node_spanned_pages = lmb_end_of_DRAM() / PAGE_SIZE; - for (i = 0 ; i < top_of_ram; i += MEMORY_INCREMENT) - numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = 0; + memory_present(0, 0, init_node_data[0].node_spanned_pages); node0_io_hole_size = top_of_ram - total_ram; } _