<html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">
So, this patch started out with me trying to keep from passing
contiguous, node-specific mem_map into free_area_init_node() and
cousins.  Instead, I relied on some calls to pfn_to_page().

This works fine and dandy when all you need is the pgdat-&gt;node_mem_map
to do pfn_to_page().  However, the non-NUMA/DISCONTIG architectures use
the real, global mem_map[] instead of a node_mem_map in the
pfn_to_page() calculation.  So, I ended up effectively trying to
initialize mem_map from itself, when it was NULL.  That was bad, and
caused some very pretty colors on someone's screen when he tested it.

So, I had to make sure to initialize the global mem_map[] before calling
into free_area_init_node().  Then, I realized how many architectures do
this on their own, and have comments like this:

        /* XXX: MRB-remove - this doesn't seem sane, should this be done somewhere else ?*/
        mem_map = NODE_DATA(0)-&gt;node_mem_map;

Also, ppc64 has some interesting hacks^Wcode in this area to make up for
any empty-memory NUMA nodes, which I think can go away now.

The following patch does what my first one did (don't pass mem_map into
the init functions), incorporates Jesse Barnes' ia64 fixes on top of
that, and gets rid of all but one of the global mem_map initializations
(parisc is weird).  It also magically removes more code than it adds. 
It could be smaller, but I shamelessly added some comments.  

Signed-off-by: Dave Hansen &lt;haveblue@us.ibm.com&gt;
---

 taka-dave/arch/arm/mm/init.c           |    4 ----
 taka-dave/arch/arm26/mm/init.c         |    2 --
 taka-dave/arch/cris/arch-v10/mm/init.c |    1 -
 taka-dave/arch/i386/mm/discontig.c     |    9 +++------
 taka-dave/arch/ia64/mm/contig.c        |    5 ++---
 taka-dave/arch/ia64/mm/init.c          |    2 +-
 taka-dave/arch/ppc64/mm/init.c         |    1 -
 taka-dave/arch/sh/mm/init.c            |    2 --
 taka-dave/arch/sh64/mm/init.c          |    3 ---
 taka-dave/arch/sparc/mm/srmmu.c        |    1 -
 taka-dave/arch/sparc/mm/sun4c.c        |    1 -
 taka-dave/arch/sparc64/mm/init.c       |    1 -
 taka-dave/arch/v850/kernel/setup.c     |    1 -
 taka-dave/mm/page_alloc.c              |   18 ++++++++++++++----
 14 files changed, 20 insertions(+), 31 deletions(-)

diff -puN arch/arm/mm/init.c~A6-no_arch_mem_map_init arch/arm/mm/init.c
--- taka/arch/arm/mm/init.c~A6-no_arch_mem_map_init	2004-09-27 13:49:13.000000000 -0700
+++ taka-dave/arch/arm/mm/init.c	2004-09-27 13:49:13.000000000 -0700
@@ -499,10 +499,6 @@ void __init paging_init(struct meminfo *
 				bdata-&gt;node_boot_start &gt;&gt; PAGE_SHIFT, zhole_size);
 	}
 
-#ifndef CONFIG_DISCONTIGMEM
-	mem_map = contig_page_data.node_mem_map;
-#endif
-
 	/*
 	 * finish off the bad pages once
 	 * the mem_map is initialised
diff -puN arch/arm26/mm/init.c~A6-no_arch_mem_map_init arch/arm26/mm/init.c
--- taka/arch/arm26/mm/init.c~A6-no_arch_mem_map_init	2004-09-27 13:49:13.000000000 -0700
+++ taka-dave/arch/arm26/mm/init.c	2004-09-27 13:49:13.000000000 -0700
@@ -309,8 +309,6 @@ void __init paging_init(struct meminfo *
 	free_area_init_node(0, pgdat, zone_size,
 			bdata-&gt;node_boot_start &gt;&gt; PAGE_SHIFT, zhole_size);
 
-	mem_map = NODE_DATA(0)-&gt;node_mem_map;
-
 	/*
 	 * finish off the bad pages once
 	 * the mem_map is initialised
diff -puN arch/cris/arch-v10/mm/init.c~A6-no_arch_mem_map_init arch/cris/arch-v10/mm/init.c
--- taka/arch/cris/arch-v10/mm/init.c~A6-no_arch_mem_map_init	2004-09-27 13:49:13.000000000 -0700
+++ taka-dave/arch/cris/arch-v10/mm/init.c	2004-09-27 13:49:13.000000000 -0700
@@ -184,7 +184,6 @@ paging_init(void)
 	 */
 
 	free_area_init_node(0, &amp;contig_page_data, zones_size, PAGE_OFFSET &gt;&gt; PAGE_SHIFT, 0);
-	mem_map = contig_page_data.node_mem_map;
 }
 
 /* Initialize remaps of some I/O-ports. It is important that this
diff -puN arch/i386/mm/discontig.c~A6-no_arch_mem_map_init arch/i386/mm/discontig.c
--- taka/arch/i386/mm/discontig.c~A6-no_arch_mem_map_init	2004-09-27 13:49:13.000000000 -0700
+++ taka-dave/arch/i386/mm/discontig.c	2004-09-27 13:49:13.000000000 -0700
@@ -417,18 +417,15 @@ void __init zone_sizes_init(void)
 		 * normal bootmem allocator, but other nodes come from the
 		 * remapped KVA area - mbligh
 		 */
-		if (!nid)
-			free_area_init_node(nid, NODE_DATA(nid),
-					zones_size, start, zholes_size);
-		else {
+		if (nid) {
 			unsigned long lmem_map;
 			lmem_map = (unsigned long)node_remap_start_vaddr[nid];
 			lmem_map += sizeof(pg_data_t) + PAGE_SIZE - 1;
 			lmem_map &amp;= PAGE_MASK;
 			NODE_DATA(nid)-&gt;node_mem_map = (struct page *)lmem_map;
-			free_area_init_node(nid, NODE_DATA(nid), zones_size,
-				start, zholes_size);
 		}
+		free_area_init_node(nid, NODE_DATA(nid), zones_size,
+				    start, zholes_size);
 	}
 	return;
 }
diff -puN arch/ia64/mm/contig.c~A6-no_arch_mem_map_init arch/ia64/mm/contig.c
--- taka/arch/ia64/mm/contig.c~A6-no_arch_mem_map_init	2004-09-27 13:49:13.000000000 -0700
+++ taka-dave/arch/ia64/mm/contig.c	2004-09-27 13:49:13.000000000 -0700
@@ -269,7 +269,7 @@ paging_init (void)
 		vmem_map = (struct page *) 0;
 		free_area_init_node(0, &amp;contig_page_data, zones_size, 0,
 				    zholes_size);
-		vmem_map = mem_map = contig_page_data.node_mem_map;
+		vmem_map = contig_page_data.node_mem_map;
 	} else {
 		unsigned long map_size;
 
@@ -280,11 +280,10 @@ paging_init (void)
 		vmem_map = (struct page *) vmalloc_end;
 		efi_memmap_walk(create_mem_map_page_table, 0);
 
-		contig_page_data.node_mem_map = vmem_map;
+		NODE_DATA(0)-&gt;node_mem_map = vmem_map;
 		free_area_init_node(0, &amp;contig_page_data, zones_size,
 				    0, zholes_size);
 
-		mem_map = contig_page_data.node_mem_map;
 		printk("Virtual mem_map starts at 0x%p\n", mem_map);
 	}
 #else /* !CONFIG_VIRTUAL_MEM_MAP */
diff -puN arch/ia64/mm/discontig.c~A6-no_arch_mem_map_init arch/ia64/mm/discontig.c
diff -puN arch/ia64/mm/init.c~A6-no_arch_mem_map_init arch/ia64/mm/init.c
--- taka/arch/ia64/mm/init.c~A6-no_arch_mem_map_init	2004-09-27 13:49:13.000000000 -0700
+++ taka-dave/arch/ia64/mm/init.c	2004-09-27 13:49:13.000000000 -0700
@@ -429,7 +429,7 @@ virtual_memmap_init (u64 start, u64 end,
 		    / sizeof(struct page));
 
 	if (map_start &lt; map_end)
-		memmap_init_zone((unsigned long)(map_end - map_start),
+		memmap_init_zone(map_start, (unsigned long) (map_end - map_start),
 				 args-&gt;nid, args-&gt;zone, page_to_pfn(map_start));
 	return 0;
 }
diff -puN arch/ppc64/mm/init.c~A6-no_arch_mem_map_init arch/ppc64/mm/init.c
--- taka/arch/ppc64/mm/init.c~A6-no_arch_mem_map_init	2004-09-27 13:49:13.000000000 -0700
+++ taka-dave/arch/ppc64/mm/init.c	2004-09-27 13:49:13.000000000 -0700
@@ -662,7 +662,6 @@ void __init paging_init(void)
 
 	free_area_init_node(0, &amp;contig_page_data, zones_size,
 			    __pa(PAGE_OFFSET) &gt;&gt; PAGE_SHIFT, zholes_size);
-	mem_map = contig_page_data.node_mem_map;
 }
 #endif /* CONFIG_DISCONTIGMEM */
 
diff -puN arch/sh/mm/init.c~A6-no_arch_mem_map_init arch/sh/mm/init.c
--- taka/arch/sh/mm/init.c~A6-no_arch_mem_map_init	2004-09-27 13:49:13.000000000 -0700
+++ taka-dave/arch/sh/mm/init.c	2004-09-27 13:49:13.000000000 -0700
@@ -216,8 +216,6 @@ void __init paging_init(void)
 #endif
 	NODE_DATA(0)-&gt;node_mem_map = NULL;
 	free_area_init_node(0, NODE_DATA(0), zones_size, __MEMORY_START &gt;&gt; PAGE_SHIFT, 0);
-	/* XXX: MRB-remove - this doesn't seem sane, should this be done somewhere else ?*/
-	mem_map = NODE_DATA(0)-&gt;node_mem_map;
 
 #ifdef CONFIG_DISCONTIGMEM
 	/*
diff -puN arch/sh64/mm/init.c~A6-no_arch_mem_map_init arch/sh64/mm/init.c
--- taka/arch/sh64/mm/init.c~A6-no_arch_mem_map_init	2004-09-27 13:49:13.000000000 -0700
+++ taka-dave/arch/sh64/mm/init.c	2004-09-27 13:49:13.000000000 -0700
@@ -124,9 +124,6 @@ void __init paging_init(void)
 	zones_size[ZONE_DMA] = MAX_LOW_PFN - START_PFN;
 	NODE_DATA(0)-&gt;node_mem_map = NULL;
 	free_area_init_node(0, NODE_DATA(0), zones_size, __MEMORY_START &gt;&gt; PAGE_SHIFT, 0);
-
-	/* XXX: MRB-remove - this doesn't seem sane, should this be done somewhere else ?*/
-	mem_map = NODE_DATA(0)-&gt;node_mem_map;
 }
 
 void __init mem_init(void)
diff -puN arch/sparc/mm/srmmu.c~A6-no_arch_mem_map_init arch/sparc/mm/srmmu.c
--- taka/arch/sparc/mm/srmmu.c~A6-no_arch_mem_map_init	2004-09-27 13:49:13.000000000 -0700
+++ taka-dave/arch/sparc/mm/srmmu.c	2004-09-27 13:49:13.000000000 -0700
@@ -1343,7 +1343,6 @@ void __init srmmu_paging_init(void)
 
 		free_area_init_node(0, &amp;contig_page_data, zones_size,
 				    pfn_base, zholes_size);
-		mem_map = contig_page_data.node_mem_map;
 	}
 }
 
diff -puN arch/sparc/mm/sun4c.c~A6-no_arch_mem_map_init arch/sparc/mm/sun4c.c
--- taka/arch/sparc/mm/sun4c.c~A6-no_arch_mem_map_init	2004-09-27 13:49:13.000000000 -0700
+++ taka-dave/arch/sparc/mm/sun4c.c	2004-09-27 13:49:13.000000000 -0700
@@ -2116,7 +2116,6 @@ void __init sun4c_paging_init(void)
 
 		free_area_init_node(0, &amp;contig_page_data, zones_size,
 				    pfn_base, zholes_size);
-		mem_map = contig_page_data.node_mem_map;
 	}
 
 	cnt = 0;
diff -puN arch/sparc64/mm/init.c~A6-no_arch_mem_map_init arch/sparc64/mm/init.c
--- taka/arch/sparc64/mm/init.c~A6-no_arch_mem_map_init	2004-09-27 13:49:13.000000000 -0700
+++ taka-dave/arch/sparc64/mm/init.c	2004-09-27 13:49:13.000000000 -0700
@@ -1511,7 +1511,6 @@ void __init paging_init(void)
 
 		free_area_init_node(0, &amp;contig_page_data, zones_size,
 				    phys_base &gt;&gt; PAGE_SHIFT, zholes_size);
-		mem_map = contig_page_data.node_mem_map;
 	}
 
 	device_scan();
diff -puN arch/v850/kernel/setup.c~A6-no_arch_mem_map_init arch/v850/kernel/setup.c
--- taka/arch/v850/kernel/setup.c~A6-no_arch_mem_map_init	2004-09-27 13:49:13.000000000 -0700
+++ taka-dave/arch/v850/kernel/setup.c	2004-09-27 13:49:13.000000000 -0700
@@ -283,5 +283,4 @@ init_mem_alloc (unsigned long ram_start,
 	NODE_DATA(0)-&gt;node_mem_map = NULL;
 	free_area_init_node (0, NODE_DATA(0), zones_size,
 			     ADDR_TO_PAGE (PAGE_OFFSET), 0);
-	mem_map = NODE_DATA(0)-&gt;node_mem_map;
 }
diff -puN mm/page_alloc.c~A6-no_arch_mem_map_init mm/page_alloc.c
--- taka/mm/page_alloc.c~A6-no_arch_mem_map_init	2004-09-27 13:49:13.000000000 -0700
+++ taka-dave/mm/page_alloc.c	2004-09-27 13:49:13.000000000 -0700
@@ -1691,14 +1691,25 @@ static void __init free_area_init_core(s
 	}
 }
 
-void __init node_alloc_mem_map(struct pglist_data *pgdat)
+static void __init alloc_node_mem_map(struct pglist_data *pgdat)
 {
 	unsigned long size;
 
+	/*
+	 * Make sure that the architecture hasn't already allocated
+	 * a node_mem_map, and that the node contains memory.
+	 */
+	if (pgdat-&gt;node_mem_map || !pgdat-&gt;node_spanned_pages)
+		return;
+
 	size = (pgdat-&gt;node_spanned_pages + 1) * sizeof(struct page);
 	pgdat-&gt;node_mem_map = alloc_bootmem_node(pgdat, size);
 #ifndef CONFIG_DISCONTIGMEM
-	mem_map = contig_page_data.node_mem_map;
+	/*
+	 * With no DISCONTIG, the global mem_map is just set as node 0's
+	 */
+	if (pgdat == NODE_DATA(0))
+		mem_map = NODE_DATA(0)-&gt;node_mem_map;
 #endif
 }
 
@@ -1710,8 +1721,7 @@ void __init free_area_init_node(int nid,
 	pgdat-&gt;node_start_pfn = node_start_pfn;
 	calculate_zone_totalpages(pgdat, zones_size, zholes_size);
 
-	if (!pfn_to_page(node_start_pfn))
-		node_alloc_mem_map(pgdat);
+	alloc_node_mem_map(pgdat);
 
 	free_area_init_core(pgdat, zones_size, zholes_size);
 }
_
</pre></body></html>