
From: Matt Tolentino <matthew.e.tolentino@intel.com>

This adds generic memory add/remove and supporting functions 
for memory hotplug into a new file as well as a memory hotplug
kernel config option.  These functions allow supporting
archs to decide the affected zone for memory hotplug operations.

Additionally, this patch includes a stab at the sysfs representation
of memory ranges as tied to the rest of config_nonlinear and memory
hotplug.  The idea here is to allow memory hotplug operations to be 
initiated not only by hardware/firmware events (I'm specifically 
thinking ACPI here), but also by userspace.  Note, this provides 
the initial framework for the sysfs support and is not yet complete.  

Signed-off-by: Matt Tolentino <matthew.e.tolentino@intel.com>
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---

 memhotplug-dave/arch/i386/Kconfig                 |    4 
 memhotplug-dave/arch/i386/mm/init.c               |  117 ++++++++
 memhotplug-dave/drivers/base/Makefile             |    3 
 memhotplug-dave/drivers/base/init.c               |    3 
 memhotplug-dave/drivers/base/memory.c             |  300 ++++++++++++++++++++++
 memhotplug-dave/include/asm-i386/memory_hotplug.h |    8 
 memhotplug-dave/include/linux/memory.h            |   72 +++++
 memhotplug-dave/include/linux/memory_hotplug.h    |   21 +
 memhotplug-dave/mm/Makefile                       |    1 
 memhotplug-dave/mm/memory_hotplug.c               |  160 +++++++++++
 memhotplug-dave/mm/nonlinear.c                    |    5 
 mm/page_alloc.c                                   |    0 
 12 files changed, 693 insertions(+), 1 deletion(-)

diff -puN arch/i386/Kconfig~L-sysfs-memory-class arch/i386/Kconfig
--- memhotplug/arch/i386/Kconfig~L-sysfs-memory-class	2004-12-10 13:52:54.000000000 -0800
+++ memhotplug-dave/arch/i386/Kconfig	2004-12-10 13:52:55.000000000 -0800
@@ -780,6 +780,10 @@ config ARCH_HAS_BOOTPA
 
 source "mm/Kconfig"
 
+config MEMORY_HOTPLUG
+	bool "Allow for memory hotplug"
+	depends on NONLINEAR
+
 config HIGHPTE
 	bool "Allocate 3rd-level pagetables from highmem"
 	depends on HIGHMEM4G || HIGHMEM64G
diff -puN arch/i386/mm/init.c~L-sysfs-memory-class arch/i386/mm/init.c
--- memhotplug/arch/i386/mm/init.c~L-sysfs-memory-class	2004-12-10 13:52:54.000000000 -0800
+++ memhotplug-dave/arch/i386/mm/init.c	2004-12-10 13:52:55.000000000 -0800
@@ -27,6 +27,7 @@
 #include <linux/slab.h>
 #include <linux/proc_fs.h>
 #include <linux/efi.h>
+#include <linux/memory_hotplug.h>
 
 #include <asm/processor.h>
 #include <asm/system.h>
@@ -293,6 +294,122 @@ void __init one_highpage_init(struct pag
 		SetPageReserved(page);
 }
 
+int add_one_highpage(struct page *page, int pfn, int bad_ppro)
+{
+	/*
+	 * there's no page_is_ram() check because that only covers ram
+	 * from boot-time.  We learned about this ram later
+	 */
+	if ( !(bad_ppro && page_kills_ppro(pfn))) {
+		set_bit(PG_highmem, &page->flags);
+		set_page_count(page, 1);
+		__free_page(page);
+		totalhigh_pages++;
+	} else {
+		SetPageReserved(page);
+		BUG(); /* for debugging.  remove later */
+	}
+	totalram_pages++;
+#ifndef CONFIG_DISCONTIGMEM
+	max_mapnr++;
+#endif
+	num_physpages++;
+	return 0;
+}
+
+
+/*
+ * Not currently handling the NUMA case.
+ * Assuming single node and all memory that
+ * has been added dynamically that would be
+ * onlined here is in HIGHMEM
+ */
+
+void online_page(struct page *page)
+{
+#ifndef CONFIG_NUMA
+	struct pglist_data *pgdata = &contig_page_data;
+	struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1;
+#endif
+	ClearPageReserved(page);
+	add_one_highpage(page, page_to_pfn(page), 0);
+	/* see note in mm/memory_hotplug.c
+	 * this needs to be fixed properly
+	 */
+	/* zone->present_pages++; */
+}
+
+/*
+ * this is for the non-NUMA, single node SMP system case.
+ * Specifically, in the case of x86, we will always add
+ * memory to the highmem for now.
+ */
+#ifndef CONFIG_NUMA
+int add_pages(u64 start, u64 size, unsigned long attr)
+{
+	struct pglist_data *pgdata = &contig_page_data;
+	struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1;
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long nr_pages = size >> PAGE_SHIFT;
+
+	return __add_pages(zone, start_pfn, nr_pages, attr);
+}
+
+int remove_pages(u64 start, u64 size, unsigned long attr)
+{
+	struct pglist_data *pgdata = &contig_page_data;
+	struct zone *zone;
+	unsigned long start_pfn, end_pfn, nr_pages;
+
+	start_pfn = start >> PAGE_SHIFT;
+	nr_pages = size >> PAGE_SHIFT;
+	end_pfn = start_pfn + nr_pages;
+
+	/*
+	 * check to see which zone the page range is in. If
+	 * not in a zone where we allow hotplug (i.e. highmem),
+	 * just fail it right now.  Leave the zone iteration,
+	 * because perhaps we will allow for normal range to be
+	 * removed at some point - like for hotswap?  not likely.
+	 * For simplicity, also fail it if the range overlaps
+	 * multiple zones.
+	 */
+	for_each_zone(zone) {
+		if ((start_pfn >= zone->zone_start_pfn) &&
+			(nr_pages < zone->spanned_pages)) {
+			printk("%s: memory should be removed from "
+				"the %s zone\n", __func__, zone->name);
+			break;
+		}
+		/* not handling removing memory ranges that overlap
+		 * multiple zones yet
+		 */
+		if ((start_pfn < zone->zone_start_pfn) &&
+			(end_pfn > zone->zone_start_pfn))
+			goto overlap;
+		if ((start_pfn >= zone->zone_start_pfn) &&
+			(nr_pages > zone->spanned_pages))
+			goto overlap;
+		/* there must be other cases...these are just the obvious */
+	}
+
+	/* make sure it is in highmem */
+	if (zone != &pgdata->node_zones[MAX_NR_ZONES-1]) {
+		printk("%s: range to be removed must be in highmem!\n",
+			__func__);
+		goto not_highmem;
+	}
+
+	return __remove_pages(zone, start_pfn, nr_pages, attr);
+
+overlap:
+	printk("%s: memory range to be removed overlaps "
+		"multiple zones!!!\n", __func__);
+not_highmem:
+	return -1;
+}
+#endif
+
 #ifndef CONFIG_DISCONTIGMEM
 void __init set_highmem_pages_init(int bad_ppro) 
 {
diff -puN drivers/base/Makefile~L-sysfs-memory-class drivers/base/Makefile
--- memhotplug/drivers/base/Makefile~L-sysfs-memory-class	2004-12-10 13:52:54.000000000 -0800
+++ memhotplug-dave/drivers/base/Makefile	2004-12-10 13:52:55.000000000 -0800
@@ -2,7 +2,8 @@
 
 obj-y			:= core.o sys.o interface.o bus.o \
 			   driver.o class.o class_simple.o platform.o \
-			   cpu.o firmware.o init.o map.o dmapool.o
+			   cpu.o firmware.o init.o map.o dmapool.o \
+			   memory.o
 obj-y			+= power/
 obj-$(CONFIG_FW_LOADER)	+= firmware_class.o
 obj-$(CONFIG_NUMA)	+= node.o
diff -puN drivers/base/init.c~L-sysfs-memory-class drivers/base/init.c
--- memhotplug/drivers/base/init.c~L-sysfs-memory-class	2004-12-10 13:52:54.000000000 -0800
+++ memhotplug-dave/drivers/base/init.c	2004-12-10 13:52:55.000000000 -0800
@@ -9,6 +9,7 @@
 
 #include <linux/device.h>
 #include <linux/init.h>
+#include <linux/memory.h>
 
 extern int devices_init(void);
 extern int buses_init(void);
@@ -17,6 +18,7 @@ extern int firmware_init(void);
 extern int platform_bus_init(void);
 extern int system_bus_init(void);
 extern int cpu_dev_init(void);
+extern int memory_dev_init(void);
 
 /**
  *	driver_init - initialize driver model.
@@ -39,4 +41,5 @@ void __init driver_init(void)
 	platform_bus_init();
 	system_bus_init();
 	cpu_dev_init();
+	memory_dev_init();
 }
diff -puN /dev/null drivers/base/memory.c
--- /dev/null	2004-11-08 15:18:04.000000000 -0800
+++ memhotplug-dave/drivers/base/memory.c	2004-12-10 13:52:55.000000000 -0800
@@ -0,0 +1,300 @@
+/*
+ * drivers/base/memory.c - basic Memory class support
+ */
+
+#include <linux/sysdev.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sched.h>	/* capable() */
+#include <linux/topology.h>
+#include <linux/device.h>
+#include <linux/memory.h>
+#include <linux/kobject.h>
+#include <asm/atomic.h>
+
+struct sysdev_class memory_sysdev_class = {
+	set_kset_name("memory"),
+};
+EXPORT_SYMBOL(memory_sysdev_class);
+
+/*
+ * With these ops structures, we can override actions for things
+ * like merging or splitting
+ */
+static int memory_hotplug_filter(struct kset *kset, struct kobject *kobj)
+{
+	struct kobj_type *ktype = get_ktype(kobj);
+	printk("Hit %s\n", __func__);
+	return 1;
+}
+
+static char *memory_hotplug_name(struct kset *kset, struct kobject *kobj)
+{
+	printk("Hit %s\n", __func__);
+	return "-no_name_implemented";
+}
+
+static int memory_hotplug(struct kset *kset, struct kobject *kobj, char **envp,
+			int num_envp, char *buffer, int buffer_size)
+{
+	int retval = 0;
+	printk("Hit %s\n", __func__);
+
+	return retval;
+}
+
+static struct kset_hotplug_ops memory_hotplug_ops = {
+	.filter = memory_hotplug_filter,
+	.name	= memory_hotplug_name,
+	.hotplug	= memory_hotplug,
+};
+
+
+/*
+ * register_memory - Setup a sysfs device for a memory block
+ */
+int
+register_memory(struct memory_block *memory, unsigned int section,
+		struct node *root)
+{
+	int error;
+
+	memory->sysdev.cls = &memory_sysdev_class;
+	memory->sysdev.id = section;
+
+	error = sysdev_register(&memory->sysdev);
+
+	if (root && !error)
+		error = sysfs_create_link(&root->sysdev.kobj,
+					  &memory->sysdev.kobj,
+					  kobject_name(&memory->sysdev.kobj));
+
+	return error;
+}
+
+/*
+ * use this as the physical section index that this memsection
+ * uses.
+ */
+
+static ssize_t show_mem_phys_index(struct sys_device *dev, char *buf)
+{
+	struct memory_block *mem =
+		container_of(dev, struct memory_block, sysdev);
+	return sprintf(buf, "%08lx\n", mem->phys_index);
+}
+
+/*
+ * online, offline, going offline, etc.
+ */
+static ssize_t show_mem_state(struct sys_device *dev, char *buf)
+{
+	struct memory_block *mem =
+		container_of(dev, struct memory_block, sysdev);
+	return sprintf(buf, "%d\n", mem->state);
+}
+/*
+ * phys_device is a bad name for this.  What I really want
+ * is a way to differentiate between memory ranges that
+ * are part of physical devices that constitute
+ * a complete removable unit or fru.
+ * i.e. do these ranges belong to the same physical device,
+ * s.t. if I offline all of these sections I can then
+ * remove the physical device?
+ */
+static ssize_t show_phys_device(struct sys_device *dev, char *buf)
+{
+	struct memory_block *mem =
+		container_of(dev, struct memory_block, sysdev);
+	return sprintf(buf, "%d\n", mem->phys_device);
+}
+
+SYSDEV_ATTR(phys_index, 0444, show_mem_phys_index, NULL);
+SYSDEV_ATTR(state, 0444, show_mem_state, NULL);
+SYSDEV_ATTR(phys_device, 0444, show_phys_device, NULL);
+
+#define mem_create_simple_file(mem, attr_name)	\
+	sysdev_create_file(&mem->sysdev, &attr_##attr_name)
+
+/*
+ * Block size attribute stuff
+ */
+
+static ssize_t
+print_block_size(struct class *class, char *buf)
+{
+	return sprintf(buf, "%lx\n", (unsigned long)CONFIG_MEM_BLOCK_SIZE);
+}
+
+static CLASS_ATTR(block_size_bytes, 0444, print_block_size, NULL);
+
+static int block_size_init(void)
+{
+	sysfs_create_file(&memory_sysdev_class.kset.kobj,
+		&class_attr_block_size_bytes.attr);
+	return 0;
+}
+
+/*
+ * All the probe stuff here
+ */
+
+/* define this off in some header somewhere ... */
+#ifdef CONFIG_ARCH_MEMORY_PROBE
+static ssize_t
+memory_probe_store(struct class *class, const char *buf, size_t count)
+{
+	/*
+	 * Hmmm... what do we really want this to do?
+	 */
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
+	printk("Hit %s!\n", __func__);
+
+	/* make call into arch code */
+	//static int num; /* hehe. no locking */
+
+	//add_memory_block(num++, 0x100*num, MEM_OFFLINE, 4);
+
+	return count;
+}
+static CLASS_ATTR(probe, 0700, NULL, memory_probe_store);
+
+static int memory_probe_init(void)
+{
+	sysfs_create_file(&memory_sysdev_class.kset.kobj,
+		&class_attr_probe.attr);
+	return 0;
+}
+#else
+#define memory_probe_init(...) (1)
+#endif
+
+/*
+ * Note that phys_device is optional.  It is here to allow for
+ * differentiation between which *physical* devices each
+ * section belongs to...
+ */
+
+int add_memory_block(unsigned long node_id, unsigned int section,
+		unsigned int phys_index, unsigned long state,
+		int phys_device)
+{
+	size_t size = sizeof(struct memory_block);
+	struct memory_block *mem = kmalloc(size, GFP_KERNEL);
+	int ret0, ret1, ret2, ret3;
+
+	printk("Hit %s\n", __func__);
+
+	if (!mem)
+		return -ENOMEM;
+
+	memset(mem, 0, size);
+
+	mem->phys_index = phys_index;
+	mem->state = state;
+	mem->phys_device = phys_device;
+
+#if 0
+	/* not yet sure how this can be optimally structured
+	 * to get the fru information from hw/fw specific drivers
+	 */
+	if (mem->callback)
+		callback(mem);
+#endif
+
+	ret0 = register_memory(mem, section, NULL);
+	ret1 = mem_create_simple_file(mem, phys_index);
+	ret2 = mem_create_simple_file(mem, state);
+	ret3 = mem_create_simple_file(mem, phys_device);
+
+	return 0;
+}
+
+#define update_memdevice(...) 	(1)
+
+static ssize_t
+online_store(struct class *class, const char *buf, size_t count)
+{
+	unsigned int section = simple_strtoul(buf, NULL, 10);
+	int i;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
+	if (mem_section[section].phys_section == INVALID_SECTION) {
+		printk("%s: Ummm.. this section is not currently mapped!\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	/*
+	 * Crude section based onlining; probably need random
+	 * address onlining...
+	 */
+	online_section(section);
+
+	/*
+	 * Now that the memory has been truly onlined, update the
+	 * appropriate entry in sysfs
+	 */
+	update_memdevice();
+
+	return count;
+}
+static CLASS_ATTR(online, 0700, NULL, online_store);
+
+static int online_init(void)
+{
+	sysfs_create_file(&memory_sysdev_class.kset.kobj,
+		&class_attr_online.attr);
+	return 0;
+}
+
+/*
+ * need an interface for the VM to add new memory regions,
+ * but without onlining it.
+ */
+int register_new_memory(unsigned int section)
+{
+	unsigned int phys_index = mem_section[section].phys_section;
+
+	printk("Hit %s\n", __func__);
+
+	if (phys_index == INVALID_SECTION) {
+		printk("%s: phys_index 0x%x is BOGUS!\n", __func__, phys_index);
+		return -EINVAL;
+	}
+
+	/* need some node info here and some sort of callback .... */
+	return add_memory_block(0, section, phys_index, MEM_OFFLINE, 0);
+}
+
+/*
+ * Initialize the sysfs support for memory devices...
+ */
+int __init memory_dev_init(void)
+{
+	unsigned int i;
+	int ret;
+
+	memory_sysdev_class.kset.hotplug_ops = &memory_hotplug_ops;
+	ret = sysdev_class_register(&memory_sysdev_class);
+
+	/*
+	 * Create entries for memory sections that were found
+	 * during boot and have been initialized
+	 */
+	for (i = 0; i < NR_SECTIONS; i++) {
+		if (mem_section[i].phys_section == INVALID_SECTION)
+			break;
+		add_memory_block(0, i, mem_section[i].phys_section, MEM_ONLINE, 0);
+	}
+
+	memory_probe_init();
+	block_size_init();
+	online_init();
+
+	return ret;
+}
diff -puN /dev/null include/asm-i386/memory_hotplug.h
--- /dev/null	2004-11-08 15:18:04.000000000 -0800
+++ memhotplug-dave/include/asm-i386/memory_hotplug.h	2004-12-10 13:52:55.000000000 -0800
@@ -0,0 +1,8 @@
+#ifndef __ASM_MEMORY_HOTPLUG_H
+#define __ASM_MEMORY_HOTPLUG_H
+
+/* VM interface that may be used by firmware interface */
+extern int add_pages(u64 start, u64 size, unsigned long attr);
+extern int remove_pages(u64 start, u64 size, unsigned long attr);
+
+#endif
diff -puN /dev/null include/linux/memory.h
--- /dev/null	2004-11-08 15:18:04.000000000 -0800
+++ memhotplug-dave/include/linux/memory.h	2004-12-10 13:52:55.000000000 -0800
@@ -0,0 +1,72 @@
+/*
+ * include/linux/memory.h - generic memory definition
+ *
+ * This is mainly for topological representation. We define the
+ * basic "struct memory_block" here, which can be embedded in per-arch
+ * definitions or NUMA information.
+ *
+ * Basic handling of the devices is done in drivers/base/memory.c
+ * and system devices are handled in drivers/base/sys.c.
+ *
+ * Memory block are exported via driverfs in the class/memory/devices/
+ * directory.
+ *
+ */
+#ifndef _LINUX_MEMORY_H_
+#define _LINUX_MEMORY_H_
+
+#include <linux/sysdev.h>
+#include <linux/node.h>
+#include <linux/compiler.h>
+#include <linux/nonlinear.h>
+#include <linux/memory_hotplug.h>
+
+#include <asm/semaphore.h>
+
+#ifndef CONFIG_NONLINEAR
+#define CONFIG_MEM_BLOCK_SIZE	(1<<27)
+#else /* tie this to nonlinear */
+#define CONFIG_MEM_BLOCK_SIZE	SECTION_SIZE
+#endif
+
+#define CONFIG_ARCH_MEMORY_PROBE 1
+
+/*
+ * Temporary shim until there is a control/ directory
+ */
+enum memory_state {
+	MEM_ONLINE,
+	MEM_OFFLINE,
+	MEM_GOING_OFFLINE,
+	MEM_INVALID,		/* huh? */
+	MEM_BROKEN		/* ouch */
+};
+
+struct memory_block {
+	unsigned long phys_index;
+	enum memory_state state; 	/* just filler for now */
+	int phys_device;		/* to which fru does this belong? */
+	void *hw;			/* optional pointer to fw/hw data */
+	int (*phys_callback)(struct memory_block *);
+	struct sys_device sysdev;
+};
+
+extern int register_memory(struct memory_block *, unsigned int, struct node *);
+extern int register_new_memory(unsigned int section);
+struct notifier_block;
+
+extern int register_memory_notifier(struct notifier_block *nb);
+extern void unregister_memory_notifier(struct notifier_block *nb);
+
+extern struct sysdev_class memory_sysdev_class;
+extern struct semaphore memory_sem;
+
+#define lock_memory_hotplug()	down(&memory_sem)
+#define unlock_memory_hotplug()	up(&memory_sem)
+#define lock_memory_hotplug_interruptible() down_interruptible(&memory_sem)
+#define hot_memory_notifier(fn, pri) {				\
+	static struct notifier_block fn##_nb = { fn, pri };	\
+	register_memory_notifier(&fn##_nb);			\
+}
+
+#endif /* _LINUX_MEMORY_H_ */
diff -puN /dev/null include/linux/memory_hotplug.h
--- /dev/null	2004-11-08 15:18:04.000000000 -0800
+++ memhotplug-dave/include/linux/memory_hotplug.h	2004-12-10 13:52:55.000000000 -0800
@@ -0,0 +1,21 @@
+#ifndef __MEMORY_HOTPLUG_H
+#define __MEMORY_HOTPLUG_H
+
+#include <asm/memory_hotplug.h>
+
+extern int zone_grow_free_lists(struct zone *zone, unsigned long new_nr_pages);
+extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages);
+extern int add_one_highpage(struct page *page, int pfn, int bad_ppro);
+/* need some defines for these for archs that don't support it */
+extern void online_page(struct page *page);
+
+struct memory_block;
+extern int __online_memory_block(struct memory_block *);
+
+/* reasonably generic interface to expand the physical pages in a zone  */
+extern int __add_pages(struct zone *zone, unsigned long start_pfn,
+	unsigned long nr_pages, unsigned long attr);
+extern int __remove_pages(struct zone *zone, unsigned long start_pfn,
+	unsigned long nr_pages, unsigned long attr);
+
+#endif
diff -puN mm/Makefile~L-sysfs-memory-class mm/Makefile
--- memhotplug/mm/Makefile~L-sysfs-memory-class	2004-12-10 13:52:55.000000000 -0800
+++ memhotplug-dave/mm/Makefile	2004-12-10 13:52:55.000000000 -0800
@@ -18,4 +18,5 @@ obj-$(CONFIG_NUMA) 	+= mempolicy.o
 obj-$(CONFIG_SHMEM) += shmem.o
 obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o
 obj-$(CONFIG_NONLINEAR)	+= nonlinear.o
+obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
 
diff -puN /dev/null mm/memory_hotplug.c
--- /dev/null	2004-11-08 15:18:04.000000000 -0800
+++ memhotplug-dave/mm/memory_hotplug.c	2004-12-10 13:52:55.000000000 -0800
@@ -0,0 +1,160 @@
+/*
+ *  linux/mm/memory_hotplug.c
+ *
+ *  Copyright (C)
+ */
+
+#include <linux/config.h>
+#include <linux/stddef.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/interrupt.h>
+#include <linux/pagemap.h>
+#include <linux/bootmem.h>
+#include <linux/compiler.h>
+#include <linux/module.h>
+#include <linux/pagevec.h>
+#include <linux/slab.h>
+#include <linux/sysctl.h>
+#include <linux/cpu.h>
+#include <linux/nonlinear.h>
+#include <linux/memory.h>
+#include <linux/memory_hotplug.h>
+
+#include <asm/tlbflush.h>
+
+static struct page *__kmalloc_section_memmap(unsigned long nr_pages)
+{
+	struct page *page, *ret;
+	unsigned long memmap_size = sizeof(struct page) * nr_pages;
+
+	page = alloc_pages(GFP_KERNEL, get_order(memmap_size));
+	if (!page) {
+		printk("Failed to allocate new memmap!\n");
+		return NULL;
+	}
+
+	ret = (struct page *)pfn_to_kaddr(page_to_pfn(page));
+	memset(ret, 0, memmap_size);
+
+	return ret;
+}
+
+/*
+ * Reasonably generic function for adding memory.  It is
+ * expected that archs that support memory hotplug will
+ * call this function after deciding the zone to which to
+ * add the new pages.
+ */
+int __add_pages(struct zone *zone, unsigned long phys_start_pfn,
+		unsigned long nr_pages, unsigned long attr)
+{
+	struct pglist_data *pgdat = zone->zone_pgdat;
+	struct page *memmap;
+	unsigned long base_pfn;
+	int i, err;
+
+	base_pfn = zone->zone_start_pfn + zone->present_pages;
+
+	alloc_memsections(base_pfn, phys_start_pfn, nr_pages);
+
+	memmap = __kmalloc_section_memmap(nr_pages);
+	if (!memmap)
+		return -ENOMEM;
+
+	err = zone_grow_free_lists(zone, zone->spanned_pages + nr_pages);
+	if (err) {
+		kfree(memmap);
+		return -1;
+	}
+
+	alloc_memmap(memmap, base_pfn, nr_pages);
+	memmap_init_zone(nr_pages, 0, zone - pgdat->node_zones, phys_start_pfn);
+
+	/* Hmm, our use of zone->present_pages above either a) needs to change
+	 * or b) needs to mean that "pages that are present, BUT not necessarily
+	 * used right now (i.e. offline)."  For now, use this hack to mean b).
+	 */
+	zone->present_pages += nr_pages;
+
+	/*
+	 * Actually, we don't want to online the pages here at all.  We
+	 * will enable the new regions to be available via sysfs and thus
+	 * onlined from user space.
+	 */
+
+	for (i = 0; i < nr_pages; i += SECTION_SIZE, base_pfn += SECTION_SIZE) {
+		unsigned int section = pfn_to_section(base_pfn);
+		register_new_memory(section);
+	}
+
+	return 0;
+}
+
+
+
+static int
+online_pages(unsigned long lpfn, unsigned long nr_pages)
+{
+	int i;
+
+	printk("%s: onlining %lx pages starting from lpfn: 0x%lx\n",
+		__func__, nr_pages, lpfn);
+
+	for (i = 0; i < nr_pages; i++, lpfn++)
+		online_page(lpfn_to_page(lpfn));
+
+	/* need error checking */
+	return 0;
+}
+
+#ifdef CONFIG_NONLINEAR
+/* this can't stay here.  it needs to go into nonlinear.c or something */
+int
+__online_memory_block(struct memory_block *mem)
+{
+	int i;
+	unsigned long section;
+
+	/*
+	 * this eventually needs to be a loop so that a memory_block
+	 * can contain more than a single section
+	 */
+	section = mem->phys_index; //pfn_to_section()??
+	//len = mem->phys_length;
+	if (mem_section[section].phys_section == INVALID_SECTION)
+		return -EINVAL;
+
+	for (i = 0; i < PAGES_PER_SECTION; i++) {
+		struct page *page = &mem_section[section].mem_map[i];
+		if (!PageReserved(page)) {
+			printk("%s: Hmm, interesting. \n", __func__);
+			return -EBUSY;
+		}
+	}
+
+	return online_pages(page_to_lpfn(mem_section[section].mem_map),
+		PAGES_PER_SECTION);
+}
+#else
+int
+__online_memory_block(struct memory_block *mem)
+{
+	printk("%s() called without CONFIG_NONLINEAR being enabled\n", __func__);
+	return -ENOSYS;
+}
+#endif
+
+
+
+int __remove_pages(struct zone *zone, unsigned long start_pfn,
+		unsigned long nr_pages, unsigned long attr)
+{
+	/*
+	 * for now, only handle 2^x sized areas
+	 */
+	if (nr_pages != 1<<get_order(nr_pages))
+		return -EINVAL;
+	return capture_page_range(start_pfn, get_order(nr_pages));
+}
+
diff -puN mm/nonlinear.c~L-sysfs-memory-class mm/nonlinear.c
--- memhotplug/mm/nonlinear.c~L-sysfs-memory-class	2004-12-10 13:52:55.000000000 -0800
+++ memhotplug-dave/mm/nonlinear.c	2004-12-10 13:52:55.000000000 -0800
@@ -24,9 +24,12 @@
 #include <linux/types.h>
 #include <linux/list.h>
 #include <linux/mm.h>
+#include <linux/module.h>
 
 struct mem_section mem_section[NR_SECTIONS];
+EXPORT_SYMBOL(mem_section);
 unsigned short phys_section[NR_PHYS_SECTIONS];
+EXPORT_SYMBOL(phys_section);
 
 void set_page_section(struct page *page, unsigned int section_nr)
 {
@@ -164,5 +167,7 @@ int invalidate_phys_mapping(unsigned lon
 	}
 	return 0;
 }
+EXPORT_SYMBOL(pfn_to_page);
+EXPORT_SYMBOL(page_to_pfn);
 
 
diff -puN mm/page_alloc.c~L-sysfs-memory-class mm/page_alloc.c
_
