Signed-off-by: Dave Hansen --- memhotplug-dave/include/linux/memory.h | 1 memhotplug-dave/mm/memory_hotplug.c | 11 ++++++ memhotplug-dave/mm/page_alloc.c | 56 +++++++++++++++++++++++++-------- 3 files changed, 56 insertions(+), 12 deletions(-) diff -puN include/linux/memory.h~Y0-empty-zone include/linux/memory.h --- memhotplug/include/linux/memory.h~Y0-empty-zone 2005-02-17 15:25:51.000000000 -0800 +++ memhotplug-dave/include/linux/memory.h 2005-02-17 15:25:51.000000000 -0800 @@ -58,6 +58,7 @@ extern int memory_dev_init(void); #define MEM_BROKEN (1<<4) extern int invalidate_phys_mapping(unsigned long, unsigned long); +extern int hot_add_zone_init(struct zone *zone, unsigned long phys_start_pfn, unsigned long size_pages); struct notifier_block; extern int register_memory_notifier(struct notifier_block *nb); diff -puN mm/memory_hotplug.c~Y0-empty-zone mm/memory_hotplug.c --- memhotplug/mm/memory_hotplug.c~Y0-empty-zone 2005-02-17 15:25:51.000000000 -0800 +++ memhotplug-dave/mm/memory_hotplug.c 2005-02-17 15:25:51.000000000 -0800 @@ -80,6 +80,17 @@ int __add_section(struct zone *zone, uns get_order(sizeof(struct page) * nr_pages)); } + if (zone->zone_start_pfn > phys_start_pfn) { + zone->spanned_pages += zone->zone_start_pfn - phys_start_pfn; + zone->zone_start_pfn = phys_start_pfn; + } + if (phys_start_pfn + nr_pages > zone->zone_start_pfn + zone->spanned_pages) { + zone->spanned_pages = (phys_start_pfn + nr_pages) - + zone->zone_start_pfn; + } + + hot_add_zone_init(zone, phys_start_pfn, PAGES_PER_SECTION); + up(&zone->resize_sem); if (ret < 0) { diff -puN mm/page_alloc.c~Y0-empty-zone mm/page_alloc.c --- memhotplug/mm/page_alloc.c~Y0-empty-zone 2005-02-17 15:25:51.000000000 -0800 +++ memhotplug-dave/mm/page_alloc.c 2005-02-17 15:25:51.000000000 -0800 @@ -1296,7 +1296,7 @@ void show_free_areas(void) /* * Builds allocation fallback zone lists. */ -static int __init build_zonelists_node(pg_data_t *pgdat, struct zonelist *zonelist, int j, int k) +int __devinit build_zonelists_node(pg_data_t *pgdat, struct zonelist *zonelist, int j, int k) { switch (k) { struct zone *zone; @@ -1304,7 +1304,12 @@ static int __init build_zonelists_node(p BUG(); case ZONE_HIGHMEM: zone = pgdat->node_zones + ZONE_HIGHMEM; - if (zone->present_pages) { + /* + * with mem hotplug we don't increment present_pages + * until the pages are actually freed into the zone, + * but we increment spanned pages much earlier + */ + if (zone->spanned_pages) { #ifndef CONFIG_HIGHMEM BUG(); #endif @@ -1312,11 +1317,11 @@ static int __init build_zonelists_node(p } case ZONE_NORMAL: zone = pgdat->node_zones + ZONE_NORMAL; - if (zone->present_pages) + if (zone->spanned_pages) zonelist->zones[j++] = zone; case ZONE_DMA: zone = pgdat->node_zones + ZONE_DMA; - if (zone->present_pages) + if (zone->spanned_pages) zonelist->zones[j++] = zone; } @@ -1387,7 +1392,7 @@ static int __init find_next_best_node(in return best_node; } -static void __init build_zonelists(pg_data_t *pgdat) +void __devinit build_zonelists(pg_data_t *pgdat) { int i, j, k, node, local_node; int prev_node, load; @@ -1434,7 +1439,7 @@ static void __init build_zonelists(pg_da #else /* CONFIG_NUMA */ -static void __init build_zonelists(pg_data_t *pgdat) +void __devinit build_zonelists(pg_data_t *pgdat) { int i, j, k, node, local_node; @@ -1613,7 +1618,7 @@ void zonetable_add(struct zone *zone, in memmap_init_zone((size), (nid), (zone), (start_pfn)) #endif -static __devinit void zone_pcp_init(struct zone *zone) +void zone_pcp_init(struct zone *zone) { unsigned long batch; int cpu; @@ -1653,7 +1658,7 @@ static __devinit void zone_pcp_init(stru zone->name, zone->present_pages, batch); } -static __devinit void zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) +void zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) { int table_size_bytes; int i; @@ -1679,7 +1684,6 @@ static void init_currently_empty_zone(st { const unsigned long zone_required_alignment = 1UL << (MAX_ORDER-1); struct pglist_data *pgdat = zone->zone_pgdat; - int nid = pgdat->node_id; zone->zone_mem_map = pfn_to_page(zone_start_pfn); zone->zone_start_pfn = zone_start_pfn; @@ -1687,9 +1691,8 @@ static void init_currently_empty_zone(st if ((zone_start_pfn) & (zone_required_alignment-1)) printk("BUG: wrong zone alignment, it will crash\n"); - memmap_init(size, nid, zone_idx(zone), zone_start_pfn); - zone_init_free_lists(pgdat, zone, zone->spanned_pages); + zone->spanned_pages = size; pgdat->nr_zones++; } @@ -1723,7 +1726,6 @@ static void __init free_area_init_core(s nr_kernel_pages += realsize; nr_all_pages += realsize; - zone->spanned_pages = size; zone->present_pages = realsize; zone->name = zone_names[j]; spin_lock_init(&zone->lock); @@ -1749,6 +1751,7 @@ static void __init free_area_init_core(s zone_wait_table_init(zone, size); init_currently_empty_zone(zone, zone_start_pfn, size); + //memmap_init(size, nid, zone_idx(zone), zone_start_pfn); zone_start_pfn += size; } } @@ -2242,3 +2245,32 @@ void *__init alloc_large_system_hash(con return table; } + +static inline int zone_previously_initialized(struct zone *zone) +{ + if (zone->wait_table_size) + return 1; + + return 0; +} + +#ifdef CONFIG_MEMORY_HOTPLUG +int hot_add_zone_init(struct zone *zone, unsigned long phys_start_pfn, unsigned long size_pages) +{ + if (zone_previously_initialized(zone)) + return -EEXIST; + + zone_wait_table_init(zone, PAGES_PER_SECTION); + init_currently_empty_zone(zone, phys_start_pfn, PAGES_PER_SECTION); + zone_pcp_init(zone); + + /* + * FIXME: there is no locking at all for the zonelists. + * Least impactful (codewise) way to do this is probably + * to freeze all the CPUs for a sec while this is done. + */ + build_zonelists(zone->zone_pgdat); + + return 0; +} +#endif _