
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---

 memhotplug1-dave/include/linux/memory.h |    1 
 memhotplug1-dave/mm/memory_hotplug.c    |   11 +++++
 memhotplug1-dave/mm/page_alloc.c        |   63 +++++++++++++++++++++++++-------
 3 files changed, 63 insertions(+), 12 deletions(-)

diff -puN mm/page_alloc.c~empty-zone mm/page_alloc.c
--- memhotplug1/mm/page_alloc.c~empty-zone	2004-11-04 16:46:41.000000000 -0800
+++ memhotplug1-dave/mm/page_alloc.c	2004-11-04 16:46:41.000000000 -0800
@@ -1562,7 +1562,7 @@ void show_free_areas(void)
 /*
  * Builds allocation fallback zone lists.
  */
-static int __init build_zonelists_node(pg_data_t *pgdat, struct zonelist *zonelist, int j, int k)
+int __devinit build_zonelists_node(pg_data_t *pgdat, struct zonelist *zonelist, int j, int k)
 {
 	switch (k) {
 		struct zone *zone;
@@ -1570,7 +1570,12 @@ static int __init build_zonelists_node(p
 		BUG();
 	case ZONE_HIGHMEM:
 		zone = pgdat->node_zones + ZONE_HIGHMEM;
-		if (zone->present_pages) {
+		/*
+		 * with mem hotplug we don't increment present_pages
+		 * until the pages are actually freed into the zone,
+		 * but we increment spanned pages much earlier
+		 */
+		if (zone->spanned_pages) {
 #ifndef CONFIG_HIGHMEM
 			BUG();
 #endif
@@ -1578,11 +1583,11 @@ static int __init build_zonelists_node(p
 		}
 	case ZONE_NORMAL:
 		zone = pgdat->node_zones + ZONE_NORMAL;
-		if (zone->present_pages)
+		if (zone->spanned_pages)
 			zonelist->zones[j++] = zone;
 	case ZONE_DMA:
 		zone = pgdat->node_zones + ZONE_DMA;
-		if (zone->present_pages)
+		if (zone->spanned_pages)
 			zonelist->zones[j++] = zone;
 	}
 
@@ -1647,7 +1652,7 @@ static int __init find_next_best_node(in
 	return best_node;
 }
 
-static void __init build_zonelists(pg_data_t *pgdat)
+void __devinit build_zonelists(pg_data_t *pgdat)
 {
 	int i, j, k, node, local_node;
 	int prev_node, load;
@@ -1695,7 +1700,7 @@ static void __init build_zonelists(pg_da
 
 #else	/* CONFIG_NUMA */
 
-static void __init build_zonelists(pg_data_t *pgdat)
+void __devinit build_zonelists(pg_data_t *pgdat)
 {
 	int i, j, k, node, local_node;
 
@@ -1847,7 +1852,7 @@ void zone_init_free_lists(struct pglist_
 	memmap_init_zone((size), (nid), (zone), (start_pfn))
 #endif
 
-static __devinit void zone_pcp_init(struct zone *zone)
+void zone_pcp_init(struct zone *zone)
 {
 	unsigned long batch;
 	int cpu;
@@ -1885,7 +1890,7 @@ static __devinit void zone_pcp_init(stru
 	}
 }
 
-static __devinit void zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
+void zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
 {
 	int table_size_bytes;
 	int i;
@@ -1912,7 +1917,6 @@ static void init_currently_empty_zone(st
 {
 	const unsigned long zone_required_alignment = 1UL << (MAX_ORDER-1);
 	struct pglist_data *pgdat = zone->zone_pgdat;
-	int nid = pgdat->node_id;
 
 #ifndef CONFIG_NONLINEAR
 	/* most uses of zone->zone_mem_map can be removed */
@@ -1923,9 +1927,8 @@ static void init_currently_empty_zone(st
 	if ((zone_start_pfn) & (zone_required_alignment-1))
 		printk("BUG: wrong zone alignment, it will crash\n");
 
-	memmap_init(size, nid, zone_idx(zone), zone_start_pfn);
-
 	zone_init_free_lists(pgdat, zone, zone->spanned_pages);
+	zone->spanned_pages = size;
 
 	smp_mb();
 	pgdat->nr_zones++;
@@ -1961,7 +1964,6 @@ static void __init free_area_init_core(s
 			nr_kernel_pages += realsize;
 		nr_all_pages += realsize;
 
-		zone->spanned_pages = size;
 		zone->present_pages = realsize;
 		zone->name = zone_names[j];
 		spin_lock_init(&zone->lock);
@@ -1980,8 +1982,10 @@ static void __init free_area_init_core(s
 		zone->nr_inactive = 0;
 		if (!size)
 			continue;
+
 		zone_wait_table_init(zone, size);
 		init_currently_empty_zone(zone, zone_start_pfn, size);
+		memmap_init(size, nid, zone_idx(zone), zone_start_pfn);
 		zone_start_pfn += size;
 	}
 }
@@ -2497,3 +2501,38 @@ void *__init alloc_large_system_hash(con
 	return table;
 }
 
+#ifdef CONFIG_NONLINEAR
+int hot_add_zone_init(struct zone *zone, unsigned long phys_start_pfn, unsigned long size_pages)
+{
+	int last_list_entry;
+	extern void build_zonelists(pg_data_t *pgdat);
+
+	/* could probably use a function here */
+	if (!zone->wait_table_size) {
+		zone_wait_table_init(zone, PAGES_PER_SECTION);
+		init_currently_empty_zone(zone, phys_start_pfn, PAGES_PER_SECTION);
+		zone_pcp_init(zone);
+
+		/*
+		 * FIXME: there is no locking at all for the zonelists.
+		 * Least impactful (codewise) way to do this is probably
+		 * to freeze all the CPUs for a sec while this is done.
+		 *
+		 * Also, this will not work for adding anything but a
+		 * highmem zone.  This really only matters for 64-bit
+		 * NUMA systems adding a new ZONE_DMA, but they'll have
+		 * bigger problems.
+		 */
+		last_list_entry = build_zonelists_node(NODE_DATA(0),
+					&NODE_DATA(0)->node_zonelists[ZONE_HIGHMEM],
+					0, ZONE_HIGHMEM);
+		NODE_DATA(0)->node_zonelists[ZONE_HIGHMEM].zones[last_list_entry] = NULL;
+	}
+	return 0;
+}
+#else
+int hot_add_zone_init(struct zone *zone, unsigned long phys_start_pfn, unsigned long size_pages)
+{
+	return 0;
+}
+#endif
diff -puN mm/memory_hotplug.c~empty-zone mm/memory_hotplug.c
--- memhotplug1/mm/memory_hotplug.c~empty-zone	2004-11-04 16:46:41.000000000 -0800
+++ memhotplug1-dave/mm/memory_hotplug.c	2004-11-04 16:46:41.000000000 -0800
@@ -68,6 +68,17 @@ int __add_section(struct zone *zone, uns
 	}
 	alloc_memsections(phys_start_pfn, phys_start_pfn, nr_pages);
 
+	if (zone->zone_start_pfn > phys_start_pfn) {
+		zone->spanned_pages += zone->zone_start_pfn - phys_start_pfn;
+		zone->zone_start_pfn = phys_start_pfn;
+	}
+	if (phys_start_pfn + nr_pages > zone->zone_start_pfn + zone->spanned_pages) {
+		zone->spanned_pages = (phys_start_pfn + nr_pages) -
+					zone->zone_start_pfn;
+	}
+
+	hot_add_zone_init(zone, phys_start_pfn, PAGES_PER_SECTION);
+
 	up(&zone->resize_sem);
 
 	memmap = __kmalloc_section_memmap(nr_pages);
diff -puN include/linux/memory.h~empty-zone include/linux/memory.h
--- memhotplug1/include/linux/memory.h~empty-zone	2004-11-04 16:46:41.000000000 -0800
+++ memhotplug1-dave/include/linux/memory.h	2004-11-04 16:46:41.000000000 -0800
@@ -58,6 +58,7 @@ extern int register_memory(struct memory
 extern int register_new_memory(unsigned int section);
 extern int unregister_memory_section(unsigned int phys_section_nr);
 extern int invalidate_phys_mapping(unsigned long pfn, unsigned long nr_pages);
+extern int hot_add_zone_init(struct zone *zone, unsigned long phys_start_pfn, unsigned long size_pages);
 struct notifier_block;
 
 extern int register_memory_notifier(struct notifier_block *nb);
_
