
Introduce a simple allocator for the NUMA remap space.  This space is
very scarce, used for structures which are best allocated node local.

Issues:
o alloc_remap takes a node_id where we might expect a pgdat which was intended
  to allow us to allocate the pgdat's using this mechanism; which we do not yet
  do.  Probabally should have alloc_remap_node() and alloc_remap_nid() for
  this purpose.

Matt's for_each_..._node() patches may have broken this.  Go back and
check those loops.

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---

 memhotplug-dave/arch/i386/Kconfig         |    5 +++
 memhotplug-dave/arch/i386/mm/discontig.c  |   43 ++++++++++++++++++------------
 memhotplug-dave/include/asm-i386/mmzone.h |    1 
 memhotplug-dave/include/linux/bootmem.h   |    9 ++++++
 memhotplug-dave/mm/page_alloc.c           |    8 ++++-
 5 files changed, 49 insertions(+), 17 deletions(-)

diff -puN arch/i386/Kconfig~B1-alloc_remap-i386 arch/i386/Kconfig
--- memhotplug/arch/i386/Kconfig~B1-alloc_remap-i386	2005-01-04 13:49:29.000000000 -0800
+++ memhotplug-dave/arch/i386/Kconfig	2005-01-04 13:49:29.000000000 -0800
@@ -769,6 +769,11 @@ config HAVE_ARCH_BOOTMEM_NODE
 	depends on NUMA
 	default y
 
+config HAVE_ARCH_ALLOC_REMAP
+	bool
+	depends on NUMA
+	default y
+
 config HIGHPTE
 	bool "Allocate 3rd-level pagetables from highmem"
 	depends on HIGHMEM4G || HIGHMEM64G
diff -puN arch/i386/mm/discontig.c~B1-alloc_remap-i386 arch/i386/mm/discontig.c
--- memhotplug/arch/i386/mm/discontig.c~B1-alloc_remap-i386	2005-01-04 13:49:29.000000000 -0800
+++ memhotplug-dave/arch/i386/mm/discontig.c	2005-01-04 13:49:29.000000000 -0800
@@ -83,6 +83,9 @@ unsigned long node_remap_offset[MAX_NUMN
 void *node_remap_start_vaddr[MAX_NUMNODES];
 void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
 
+void *node_remap_end_vaddr[MAX_NUMNODES];
+void *node_remap_alloc_vaddr[MAX_NUMNODES];
+
 /*
  * FLAT - support for basic PC memory model with discontig enabled, essentially
  *        a single node with all available processors in it with a flat
@@ -137,6 +140,20 @@ static void __init allocate_pgdat(int ni
 	}
 }
 
+void *alloc_remap(int nid, unsigned long size)
+{
+	void *allocation = node_remap_alloc_vaddr[nid];
+
+	size = ALIGN(size, L1_CACHE_BYTES);
+	if (!allocation || (allocation + size) >= node_remap_end_vaddr[nid])
+		return 0;
+
+	node_remap_alloc_vaddr[nid] += size;
+	memset(allocation, 0, size);
+
+	return allocation;
+}
+
 void __init remap_numa_kva(void)
 {
 	void *vaddr;
@@ -173,8 +190,8 @@ static unsigned long calculate_numa_rema
 		printk("Reserving %ld pages of KVA for lmem_map of node %d\n",
 				size, nid);
 		node_remap_size[nid] = size;
-		reserve_pages += size;
 		node_remap_offset[nid] = reserve_pages;
+		reserve_pages += size;
 		printk("Shrinking node %d from %ld pages to %ld pages\n",
 			nid, node_end_pfn[nid], node_end_pfn[nid] - size);
 		node_end_pfn[nid] -= size;
@@ -257,12 +274,18 @@ unsigned long __init setup_memory(void)
 			(ulong) pfn_to_kaddr(max_low_pfn));
 	for_each_online_node(nid) {
 		node_remap_start_vaddr[nid] = pfn_to_kaddr(
-			(highstart_pfn + reserve_pages) - node_remap_offset[nid]);
+			highstart_pfn + node_remap_offset[nid]);
+		/* Init the node remap allocator */
+		node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] +
+			(node_remap_size[nid] * PAGE_SIZE);
+		node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] +
+			ALIGN(sizeof(pg_data_t), PAGE_SIZE);
+
 		allocate_pgdat(nid);
 		printk ("node %d will remap to vaddr %08lx - %08lx\n", nid,
 			(ulong) node_remap_start_vaddr[nid],
-			(ulong) pfn_to_kaddr(highstart_pfn + reserve_pages
-			    - node_remap_offset[nid] + node_remap_size[nid]));
+			(ulong) pfn_to_kaddr(highstart_pfn
+			    + node_remap_offset[nid] + node_remap_size[nid]));
 	}
 	printk("High memory starts at vaddr %08lx\n",
 			(ulong) pfn_to_kaddr(highstart_pfn));
@@ -324,18 +347,6 @@ void __init zone_sizes_init(void)
 			}
 		}
 		zholes_size = get_zholes_size(nid);
-		/*
-		 * We let the lmem_map for node 0 be allocated from the
-		 * normal bootmem allocator, but other nodes come from the
-		 * remapped KVA area - mbligh
-		 */
-		if (nid) {
-			unsigned long lmem_map;
-			lmem_map = (unsigned long)node_remap_start_vaddr[nid];
-			lmem_map += sizeof(pg_data_t) + PAGE_SIZE - 1;
-			lmem_map &= PAGE_MASK;
-			NODE_DATA(nid)->node_mem_map = (struct page *)lmem_map;
-		}
 		free_area_init_node(nid, NODE_DATA(nid), zones_size,
 				    start, zholes_size);
 	}
diff -puN include/asm-i386/mmzone.h~B1-alloc_remap-i386 include/asm-i386/mmzone.h
--- memhotplug/include/asm-i386/mmzone.h~B1-alloc_remap-i386	2005-01-04 13:49:29.000000000 -0800
+++ memhotplug-dave/include/asm-i386/mmzone.h	2005-01-04 13:49:29.000000000 -0800
@@ -16,6 +16,7 @@
 	#else	/* summit or generic arch */
 		#include <asm/srat.h>
 	#endif
+
 #else /* !CONFIG_NUMA */
 	#define get_memcfg_numa get_memcfg_numa_flat
 	#define get_zholes_size(n) (0)
diff -puN include/linux/bootmem.h~B1-alloc_remap-i386 include/linux/bootmem.h
--- memhotplug/include/linux/bootmem.h~B1-alloc_remap-i386	2005-01-04 13:49:29.000000000 -0800
+++ memhotplug-dave/include/linux/bootmem.h	2005-01-04 13:49:29.000000000 -0800
@@ -68,6 +68,15 @@ extern void * __init __alloc_bootmem_nod
 	__alloc_bootmem_node((pgdat), (x), PAGE_SIZE, 0)
 #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
 
+#ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP
+extern void *alloc_remap(int nid, unsigned long size);
+#else
+static inline void *alloc_remap(int nid, unsigned long size)
+{
+	return NULL;
+}
+#endif
+
 extern unsigned long __initdata nr_kernel_pages;
 extern unsigned long __initdata nr_all_pages;
 
diff -puN mm/page_alloc.c~B1-alloc_remap-i386 mm/page_alloc.c
--- memhotplug/mm/page_alloc.c~B1-alloc_remap-i386	2005-01-04 13:49:29.000000000 -0800
+++ memhotplug-dave/mm/page_alloc.c	2005-01-04 13:49:29.000000000 -0800
@@ -1766,6 +1766,7 @@ static void __init free_area_init_core(s
 static void __init alloc_node_mem_map(struct pglist_data *pgdat)
 {
 	unsigned long size;
+	void *map;
 
 	/*
 	 * Make sure that the architecture hasn't already allocated
@@ -1775,7 +1776,12 @@ static void __init alloc_node_mem_map(st
 		return;
 
 	size = (pgdat->node_spanned_pages + 1) * sizeof(struct page);
-	pgdat->node_mem_map = alloc_bootmem_node(pgdat, size);
+
+	map = alloc_remap(pgdat->node_id, size);
+	if (!map)
+		map = alloc_bootmem_node(pgdat, size);
+	pgdat->node_mem_map = map;
+
 #ifndef CONFIG_DISCONTIGMEM
 	/*
 	 * With no DISCONTIG, the global mem_map is just set as node 0's
_
