
If a zone is empty at boot-time and then hot-added to later,
it needs to run the same init code that would have been run
on it at boot.

This patch breaks out zone table and per-cpu-pages functions
for use by the hotplug code.  You can almost see all of the
free_area_init_core() function on one page now. :)

Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---

 memhotplug-dave/mm/page_alloc.c |  152 ++++++++++++++++++++++------------------
 1 files changed, 87 insertions(+), 65 deletions(-)

diff -puN mm/page_alloc.c~A8-pcp_zone_init mm/page_alloc.c
--- memhotplug/mm/page_alloc.c~A8-pcp_zone_init	2004-12-10 13:52:29.000000000 -0800
+++ memhotplug-dave/mm/page_alloc.c	2004-12-10 13:52:29.000000000 -0800
@@ -1625,6 +1625,87 @@ void zone_init_free_lists(struct pglist_
 	memmap_init_zone((size), (nid), (zone), (start_pfn))
 #endif
 
+static __devinit void zone_pcp_init(struct zone *zone)
+{
+	unsigned long batch;
+	int cpu;
+
+	/*
+	 * The per-cpu-pages pools are set to around 1000th of the
+	 * size of the zone.  But no more than 1/4 of a meg - there's
+	 * no point in going beyond the size of L2 cache.
+	 *
+	 * OK, so we don't know how big the cache is.  So guess.
+	 */
+	batch = zone->present_pages / 1024;
+	if (batch * PAGE_SIZE > 256 * 1024)
+		batch = (256 * 1024) / PAGE_SIZE;
+	batch /= 4;		/* We effectively *= 4 below */
+	if (batch < 1)
+		batch = 1;
+
+	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+		struct per_cpu_pages *pcp;
+
+		pcp = &zone->pageset[cpu].pcp[0];	/* hot */
+		pcp->count = 0;
+		pcp->low = 2 * batch;
+		pcp->high = 6 * batch;
+		pcp->batch = 1 * batch;
+		INIT_LIST_HEAD(&pcp->list);
+
+		pcp = &zone->pageset[cpu].pcp[1];	/* cold */
+		pcp->count = 0;
+		pcp->low = 0;
+		pcp->high = 2 * batch;
+		pcp->batch = 1 * batch;
+		INIT_LIST_HEAD(&pcp->list);
+	}
+}
+
+static __devinit void zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
+{
+	int table_size_bytes;
+	int i;
+	/*
+	 * The per-page waitqueue mechanism uses hashed waitqueues
+	 * per zone.
+	 */
+	zone->wait_table_size = wait_table_size(zone_size_pages);
+	zone->wait_table_bits =
+		wait_table_bits(zone->wait_table_size);
+	table_size_bytes = zone->wait_table_size * sizeof(wait_queue_head_t);
+	if (system_state < SYSTEM_RUNNING)
+		zone->wait_table = alloc_bootmem_node(zone->zone_pgdat,
+						      table_size_bytes);
+	else
+		zone->wait_table = kmalloc(table_size_bytes, GFP_KERNEL);
+
+
+	for(i = 0; i < zone->wait_table_size; ++i)
+		init_waitqueue_head(zone->wait_table + i);
+}
+
+static void init_currently_empty_zone(struct zone *zone, unsigned long zone_start_pfn, unsigned long size)
+{
+	const unsigned long zone_required_alignment = 1UL << (MAX_ORDER-1);
+	struct pglist_data *pgdat = zone->zone_pgdat;
+	int nid = pgdat->node_id;
+
+	zone->zone_mem_map = pfn_to_page(zone_start_pfn);
+	zone->zone_start_pfn = zone_start_pfn;
+
+	if ((zone_start_pfn) & (zone_required_alignment-1))
+		printk("BUG: wrong zone alignment, it will crash\n");
+
+	memmap_init(size, nid, zone_idx(zone), zone_start_pfn);
+
+	zone_init_free_lists(pgdat, zone, zone->spanned_pages);
+
+	smp_mb();
+	pgdat->nr_zones++;
+}
+
 /*
  * Set up the zone data structures:
  *   - mark all pages reserved
@@ -1634,9 +1715,8 @@ void zone_init_free_lists(struct pglist_
 static void __init free_area_init_core(struct pglist_data *pgdat,
 		unsigned long *zones_size, unsigned long *zholes_size)
 {
-	unsigned long i, j;
-	const unsigned long zone_required_alignment = 1UL << (MAX_ORDER-1);
-	int cpu, nid = pgdat->node_id;
+	unsigned long j;
+	int nid = pgdat->node_id;
 	unsigned long zone_start_pfn = pgdat->node_start_pfn;
 
 	pgdat->nr_zones = 0;
@@ -1646,7 +1726,6 @@ static void __init free_area_init_core(s
 	for (j = 0; j < MAX_NR_ZONES; j++) {
 		struct zone *zone = pgdat->node_zones + j;
 		unsigned long size, realsize;
-		unsigned long batch;
 
 		zone_table[NODEZONE(nid, j)] = zone;
 		realsize = size = zones_size[j];
@@ -1666,40 +1745,7 @@ static void __init free_area_init_core(s
 		zone->free_pages = 0;
 
 		zone->temp_priority = zone->prev_priority = DEF_PRIORITY;
-
-		/*
-		 * The per-cpu-pages pools are set to around 1000th of the
-		 * size of the zone.  But no more than 1/4 of a meg - there's
-		 * no point in going beyond the size of L2 cache.
-		 *
-		 * OK, so we don't know how big the cache is.  So guess.
-		 */
-		batch = zone->present_pages / 1024;
-		if (batch * PAGE_SIZE > 256 * 1024)
-			batch = (256 * 1024) / PAGE_SIZE;
-		batch /= 4;		/* We effectively *= 4 below */
-		if (batch < 1)
-			batch = 1;
-
-		for (cpu = 0; cpu < NR_CPUS; cpu++) {
-			struct per_cpu_pages *pcp;
-
-			pcp = &zone->pageset[cpu].pcp[0];	/* hot */
-			pcp->count = 0;
-			pcp->low = 2 * batch;
-			pcp->high = 6 * batch;
-			pcp->batch = 1 * batch;
-			INIT_LIST_HEAD(&pcp->list);
-
-			pcp = &zone->pageset[cpu].pcp[1];	/* cold */
-			pcp->count = 0;
-			pcp->low = 0;
-			pcp->high = 2 * batch;
-			pcp->batch = 1 * batch;
-			INIT_LIST_HEAD(&pcp->list);
-		}
-		printk(KERN_DEBUG "  %s zone: %lu pages, LIFO batch:%lu\n",
-				zone_names[j], realsize, batch);
+		zone_pcp_init(zone);
 		INIT_LIST_HEAD(&zone->active_list);
 		INIT_LIST_HEAD(&zone->inactive_list);
 		zone->nr_scan_active = 0;
@@ -1708,37 +1754,13 @@ static void __init free_area_init_core(s
 		zone->nr_inactive = 0;
 		if (!size)
 			continue;
-
-		/*
-		 * The per-page waitqueue mechanism uses hashed waitqueues
-		 * per zone.
-		 */
-		zone->wait_table_size = wait_table_size(size);
-		zone->wait_table_bits =
-			wait_table_bits(zone->wait_table_size);
-		zone->wait_table = (wait_queue_head_t *)
-			alloc_bootmem_node(pgdat, zone->wait_table_size
-						* sizeof(wait_queue_head_t));
-
-		for(i = 0; i < zone->wait_table_size; ++i)
-			init_waitqueue_head(zone->wait_table + i);
-
-		pgdat->nr_zones = j+1;
-
-		zone->zone_mem_map = pfn_to_page(zone_start_pfn);
-		zone->zone_start_pfn = zone_start_pfn;
-
-		if ((zone_start_pfn) & (zone_required_alignment-1))
-			printk("BUG: wrong zone alignment, it will crash\n");
-
-		memmap_init(size, nid, j, zone_start_pfn);
-
+		zone_wait_table_init(zone, size);
+		init_currently_empty_zone(zone, zone_start_pfn, size);
 		zone_start_pfn += size;
-
-		zone_init_free_lists(pgdat, zone, zone->spanned_pages);
 	}
 }
 
+
 static void __init alloc_node_mem_map(struct pglist_data *pgdat)
 {
 	unsigned long size;
_
