
Restore the x86-64 support for memory hotplug in 
the context of sparsemem.  This arch specific patch includes
the [add,remove]_memory and online functions as well as the
kernel page table initialization changes necessary to map new 
memory should it be added at runtime.  Also, allow for sparsemem 
to be an available option to single node memory hotplug kernels,
by removing the strict dependency on CONFIG_NUMA.   

Signed-off-by: Matt Tolentino <matthew.e.tolentino@intel.com>
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---

 include/asm-x86_64/mman.h             |    0 
 memhotplug-dave/arch/x86_64/Kconfig   |    1 
 memhotplug-dave/arch/x86_64/mm/init.c |  193 ++++++++++++++++++++++++++++------
 3 files changed, 161 insertions(+), 33 deletions(-)

diff -puN arch/x86_64/Kconfig~L4-x86-64-memory-hotplug-no-sparse arch/x86_64/Kconfig
--- memhotplug/arch/x86_64/Kconfig~L4-x86-64-memory-hotplug-no-sparse	2005-04-13 14:21:13.000000000 -0700
+++ memhotplug-dave/arch/x86_64/Kconfig	2005-04-13 14:21:13.000000000 -0700
@@ -286,7 +286,6 @@ config ARCH_DISCONTIGMEM_DEFAULT
 
 config ARCH_SPARSEMEM_ENABLE
 	def_bool y
-	depends on NUMA
 
 config ARCH_FLATMEM_DISABLE
 	def_bool y
diff -puN arch/x86_64/mm/init.c~L4-x86-64-memory-hotplug-no-sparse arch/x86_64/mm/init.c
--- memhotplug/arch/x86_64/mm/init.c~L4-x86-64-memory-hotplug-no-sparse	2005-04-13 14:21:13.000000000 -0700
+++ memhotplug-dave/arch/x86_64/mm/init.c	2005-04-13 14:21:13.000000000 -0700
@@ -22,6 +22,8 @@
 #include <linux/pagemap.h>
 #include <linux/bootmem.h>
 #include <linux/proc_fs.h>
+#include <linux/module.h>
+#include <linux/memory_hotplug.h>
 
 #include <asm/processor.h>
 #include <asm/system.h>
@@ -179,13 +181,19 @@ static  struct temp_map { 
 	{}
 }; 
 
-static __init void *alloc_low_page(int *index, unsigned long *phys) 
+static __devinit void *alloc_low_page(int *index, unsigned long *phys)
 { 
 	struct temp_map *ti;
 	int i; 
 	unsigned long pfn = table_end++, paddr; 
 	void *adr;
 
+	if (after_bootmem) {
+		adr = (void *)get_zeroed_page(GFP_ATOMIC);
+		*phys = __pa(adr);
+		return adr;
+	}
+
 	if (pfn >= end_pfn) 
 		panic("alloc_low_page: ran out of memory"); 
 	for (i = 0; temp_mappings[i].allocated; i++) {
@@ -198,55 +206,95 @@ static __init void *alloc_low_page(int *
 	ti->allocated = 1; 
 	__flush_tlb(); 	       
 	adr = ti->address + ((pfn << PAGE_SHIFT) & ~PMD_MASK); 
+	memset(adr, 0, PAGE_SIZE);
 	*index = i; 
 	*phys  = pfn * PAGE_SIZE;  
 	return adr; 
 } 
 
-static __init void unmap_low_page(int i)
+static __devinit void unmap_low_page(int i)
 { 
-	struct temp_map *ti = &temp_mappings[i];
+	struct temp_map *ti;
+
+	if (after_bootmem)
+		return;
+	ti = &temp_mappings[i];
 	set_pmd(ti->pmd, __pmd(0));
 	ti->allocated = 0; 
 } 
 
-static void __init phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
+
+static void __devinit
+phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end)
+{
+	int i;
+
+	printk("%s: pmd: 0x%p, address: 0x%lx end: 0x%lx\n",
+		__func__, pmd, address, end);
+
+	for (i = 0; i < PTRS_PER_PMD; pmd++, i++, address += PMD_SIZE) {
+		unsigned long entry;
+
+		if (address > end) {
+			for (; i < PTRS_PER_PMD; i++, pmd++)
+				set_pmd(pmd, __pmd(0));
+			break;
+		}
+		entry = _PAGE_NX|_PAGE_PSE | _KERNPG_TABLE | _PAGE_GLOBAL | address;
+		entry &= __supported_pte_mask;
+		set_pmd(pmd, __pmd(entry));
+	}
+}
+
+
+static void __devinit
+phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
+{
+	pmd_t *pmd = pmd_offset(pud, (unsigned long)__va(address));
+
+	printk("%s: addr: 0x%lx end: 0x%lx pmd: 0x%p\n",
+		__func__, address, end, pmd);
+
+	if (pmd_none(*pmd)) {
+		spin_lock(&init_mm.page_table_lock);
+		phys_pmd_init(pmd, address, end);
+		spin_unlock(&init_mm.page_table_lock);
+		__flush_tlb_all();
+	}
+}
+
+
+
+static void __devinit phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
 { 
-	long i, j; 
+	long i = pud_index(address);
 
-	i = pud_index(address);
 	pud = pud + i;
+
+	if (after_bootmem && pud_val(*pud)) {
+		phys_pmd_update(pud, address, end);
+		return;
+	}
+
 	for (; i < PTRS_PER_PUD; pud++, i++) {
 		int map; 
 		unsigned long paddr, pmd_phys;
 		pmd_t *pmd;
 
-		paddr = address + i*PUD_SIZE;
-		if (paddr >= end) { 
-			for (; i < PTRS_PER_PUD; i++, pud++) 
-				set_pud(pud, __pud(0)); 
+		paddr = (address & PGDIR_MASK) + i*PUD_SIZE;
+		if (paddr >= end)
 			break;
-		} 
 
-		if (!e820_mapped(paddr, paddr+PUD_SIZE, 0)) { 
+		if (!after_bootmem && !e820_mapped(paddr, paddr+PUD_SIZE, 0)) {
 			set_pud(pud, __pud(0)); 
 			continue;
 		} 
 
 		pmd = alloc_low_page(&map, &pmd_phys);
+		if (after_bootmem) spin_lock(&init_mm.page_table_lock);
 		set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
-		for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) {
-			unsigned long pe;
-
-			if (paddr >= end) { 
-				for (; j < PTRS_PER_PMD; j++, pmd++)
-					set_pmd(pmd,  __pmd(0)); 
-				break;
-		}
-			pe = _PAGE_NX|_PAGE_PSE | _KERNPG_TABLE | _PAGE_GLOBAL | paddr;
-			pe &= __supported_pte_mask;
-			set_pmd(pmd, __pmd(pe));
-		}
+		phys_pmd_init(pmd, paddr, end);
+		if (after_bootmem) spin_unlock(&init_mm.page_table_lock);
 		unmap_low_page(map);
 	}
 	__flush_tlb();
@@ -267,12 +315,16 @@ static void __init find_early_table_spac
 
 	table_start >>= PAGE_SHIFT;
 	table_end = table_start;
+
+	early_printk("kernel direct mapping tables upto %lx @ %lx-%lx\n", end,
+	       table_start<<PAGE_SHIFT,
+	       table_end<<PAGE_SHIFT);
 }
 
 /* Setup the direct mapping of the physical memory at PAGE_OFFSET.
    This runs before bootmem is initialized and gets pages directly from the 
    physical memory. To access them they are temporarily mapped. */
-void __init init_memory_mapping(unsigned long start, unsigned long end)
+void __devinit init_memory_mapping(unsigned long start, unsigned long end)
 { 
 	unsigned long next; 
 
@@ -284,7 +336,8 @@ void __init init_memory_mapping(unsigned
 	 * mapped.  Unfortunately this is done currently before the nodes are 
 	 * discovered.
 	 */
-	find_early_table_space(end);
+	if (!after_bootmem)
+		find_early_table_space(end);
 
 	start = (unsigned long)__va(start);
 	end = (unsigned long)__va(end);
@@ -292,20 +345,26 @@ void __init init_memory_mapping(unsigned
 	for (; start < end; start = next) {
 		int map;
 		unsigned long pud_phys; 
-		pud_t *pud = alloc_low_page(&map, &pud_phys);
+		pgd_t *pgd = pgd_offset_k(start);
+		pud_t *pud;
+
+		if (after_bootmem)
+			pud = pud_offset_k(pgd, __PAGE_OFFSET);
+		else
+			pud = alloc_low_page(&map, &pud_phys);
+
 		next = start + PGDIR_SIZE;
 		if (next > end) 
 			next = end; 
 		phys_pud_init(pud, __pa(start), __pa(next));
-		set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
+		if (!after_bootmem)
+			set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
 		unmap_low_page(map);   
 	} 
 
-	asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features));
+	if (!after_bootmem)
+		asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features));
 	__flush_tlb_all();
-	early_printk("kernel direct mapping tables upto %lx @ %lx-%lx\n", end, 
-	       table_start<<PAGE_SHIFT, 
-	       table_end<<PAGE_SHIFT);
 }
 
 extern struct x8664_pda cpu_pda[NR_CPUS];
@@ -395,6 +454,76 @@ static inline int page_is_ram (unsigned 
 	return 0;
 }
 
+/*
+ * Memory hotplug specific functions
+ */
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+
+void online_page(struct page *page)
+{
+	ClearPageReserved(page);
+	set_page_count(page, 1);
+	__free_page(page);
+	totalram_pages++;
+	num_physpages++;
+}
+
+int add_memory(u64 start, u64 size, unsigned long attr)
+{
+	struct pglist_data *pgdat = &contig_page_data;
+	struct zone *zone = pgdat->node_zones + MAX_NR_ZONES - 2;
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long nr_pages = size >> PAGE_SHIFT;
+	int ret;
+
+	ret = __add_pages(zone, start_pfn, nr_pages, attr);
+	if (ret)
+		goto error;
+
+	init_memory_mapping(start, (start + size - 1));
+
+	return ret;
+error:
+	printk("%s: Problem encountered in __add_pages!\n", __func__);
+	return ret;
+}
+EXPORT_SYMBOL(add_memory);
+
+int remove_memory(u64 start, u64 size, unsigned long attr)
+{
+	struct zone *zone;
+	unsigned long start_pfn, end_pfn, nr_pages;
+
+	printk("%s: start: 0x%llx size: 0x%llx attr: 0x%lx\n",
+		__func__, start, size, attr);
+
+	start_pfn = start >> PAGE_SHIFT;
+	nr_pages = size >> PAGE_SHIFT;
+	/* end_pfn is the last *valid* pfn */
+	end_pfn = start_pfn + nr_pages - 1;
+
+	zone = page_zone(pfn_to_page(start_pfn));
+
+	printk("%s: memory will be removed from the %s zone\n",
+		__func__, zone->name);
+	printk("%s: start_pfn: 0x%lx nr_pages: 0x%lx end_pfn: 0x%lx\n",
+		__func__, start_pfn, nr_pages, end_pfn);
+
+	if (zone != page_zone(pfn_to_page(end_pfn)))
+		goto overlap;
+
+	printk("%s: just before remove pages\n", __func__);
+
+	return __remove_pages(zone, start_pfn, nr_pages, attr);
+overlap:
+	printk("%s: memory range overlaps multiple zones?\n", __func__);
+	return -ENOSYS;
+}
+EXPORT_SYMBOL(remove_memory);
+
+#endif
+
 extern int swiotlb_force;
 
 static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
diff -puN include/asm-x86_64/mman.h~L4-x86-64-memory-hotplug-no-sparse include/asm-x86_64/mman.h
_
