
Just a node, the entire M and N series patches will be combined, eventually.
They're just separate now for convenience of the tree maintainer.

The following patch reworks a lot of code.  It mostly makes it easier to
support multiple operations on the memory blocks without duplicating
too much code.  

Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---

 memhotplug-dave/arch/i386/mm/init.c            |   46 ++----
 memhotplug-dave/drivers/base/memory.c          |  174 ++++++++++++++++++++++---
 memhotplug-dave/include/linux/memory.h         |   21 +--
 memhotplug-dave/include/linux/memory_hotplug.h |    8 -
 memhotplug-dave/mm/memory_hotplug.c            |   42 ------
 5 files changed, 188 insertions(+), 103 deletions(-)

diff -puN arch/i386/mm/init.c~N1-sysfs-memory-class-remove arch/i386/mm/init.c
--- memhotplug/arch/i386/mm/init.c~N1-sysfs-memory-class-remove	2004-12-10 13:52:58.000000000 -0800
+++ memhotplug-dave/arch/i386/mm/init.c	2004-12-10 13:52:58.000000000 -0800
@@ -337,7 +337,7 @@ void online_page(struct page *page)
  * memory to the highmem for now.
  */
 #ifndef CONFIG_NUMA
-int add_pages(u64 start, u64 size, unsigned long attr)
+int add_memory(u64 start, u64 size, unsigned long attr)
 {
 	struct pglist_data *pgdata = &contig_page_data;
 	struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1;
@@ -347,9 +347,8 @@ int add_pages(u64 start, u64 size, unsig
 	return __add_pages(zone, start_pfn, nr_pages, attr);
 }
 
-int remove_pages(u64 start, u64 size, unsigned long attr)
+int remove_memory(u64 start, u64 size, unsigned long attr)
 {
-	struct pglist_data *pgdata = &contig_page_data;
 	struct zone *zone;
 	unsigned long start_pfn, end_pfn, nr_pages;
 
@@ -360,34 +359,23 @@ int remove_pages(u64 start, u64 size, un
 	/*
 	 * check to see which zone the page range is in. If
 	 * not in a zone where we allow hotplug (i.e. highmem),
-	 * just fail it right now.  Leave the zone iteration,
-	 * because perhaps we will allow for normal range to be
-	 * removed at some point - like for hotswap?  not likely.
-	 * For simplicity, also fail it if the range overlaps
-	 * multiple zones.
+	 * just fail it right now.
 	 */
-	for_each_zone(zone) {
-		if ((start_pfn >= zone->zone_start_pfn) &&
-			(nr_pages < zone->spanned_pages)) {
-			printk("%s: memory should be removed from "
-				"the %s zone\n", __func__, zone->name);
-			break;
-		}
-		/* not handling removing memory ranges that overlap
-		 * multiple zones yet
-		 */
-		if ((start_pfn < zone->zone_start_pfn) &&
-			(end_pfn > zone->zone_start_pfn))
-			goto overlap;
-		if ((start_pfn >= zone->zone_start_pfn) &&
-			(nr_pages > zone->spanned_pages))
-			goto overlap;
-		/* there must be other cases...these are just the obvious */
-	}
+	zone = page_zone(pfn_to_page(start_pfn));
+
+	printk("%s(): memory will be removed from "
+			"the %s zone\n", __func__, zone->name);
+
+	/*
+	 * not handling removing memory ranges that
+	 * overlap multiple zones yet
+	 */
+	if (zone != page_zone(pfn_to_page(end_pfn-1)))
+		goto overlap;
 
 	/* make sure it is in highmem */
-	if (zone != &pgdata->node_zones[MAX_NR_ZONES-1]) {
-		printk("%s: range to be removed must be in highmem!\n",
+	if (!is_highmem(zone)) {
+		printk("%s(): range to be removed must be in highmem!\n",
 			__func__);
 		goto not_highmem;
 	}
@@ -395,7 +383,7 @@ int remove_pages(u64 start, u64 size, un
 	return __remove_pages(zone, start_pfn, nr_pages, attr);
 
 overlap:
-	printk("%s: memory range to be removed overlaps "
+	printk("%s(): memory range to be removed overlaps "
 		"multiple zones!!!\n", __func__);
 not_highmem:
 	return -1;
diff -puN drivers/base/memory.c~N1-sysfs-memory-class-remove drivers/base/memory.c
--- memhotplug/drivers/base/memory.c~N1-sysfs-memory-class-remove	2004-12-10 13:52:58.000000000 -0800
+++ memhotplug-dave/drivers/base/memory.c	2004-12-10 13:52:58.000000000 -0800
@@ -10,6 +10,8 @@
 #include <linux/device.h>
 #include <linux/memory.h>
 #include <linux/kobject.h>
+#include <linux/memory_hotplug.h>
+#include <linux/mm.h>
 #include <asm/atomic.h>
 #include <asm/uaccess.h>
 
@@ -128,20 +130,92 @@ static ssize_t show_mem_state(struct sys
 	return len;
 }
 
-static int online_memory_block(struct memory_block *mem)
+#ifdef CONFIG_NONLINEAR
+/* this can't stay here.  it needs to go into nonlinear.c or something */
+static int
+memory_block_action(struct memory_block *mem, unsigned long action)
+{
+	int i;
+	unsigned long psection;
+	unsigned long start_pfn, start_paddr;
+	struct page *first_page;
+	int ret;
+	int old_state = mem->state;
+
+	/*
+	 * this eventually needs to be a loop so that a memory_block
+	 * can contain more than a single section
+	 */
+	psection = mem->phys_index; //pfn_to_section()??
+	first_page = pfn_to_page(section_to_pfn(psection));
+	printk(KERN_DEBUG "%s()\n"
+	       KERN_DEBUG "\tpsection: %ld\n"
+	       KERN_DEBUG "\tfirst_page: %p\n"
+	       KERN_DEBUG "\tphys_index: %08lx\n",
+		__func__, psection, first_page, mem->phys_index);
+	printk(KERN_DEBUG "\tphys_section[%2ld]: %d\n", psection, phys_section[psection]);
+	for (i = 0; i < PAGES_PER_SECTION; i++) {
+		if ((action == MEM_ONLINE) && !PageReserved(first_page)) {
+			printk(KERN_WARNING "%s: section number %ld page number %d "
+				"not reserved, was it already online? \n",
+				__func__, psection, i);
+			return -EBUSY;
+		}
+	}
+
+	switch (action) {
+		case MEM_ONLINE:
+			start_pfn = page_to_pfn(first_page);
+			ret = online_pages(start_pfn, PAGES_PER_SECTION);
+			break;
+		case MEM_OFFLINE:
+			mem->state = MEM_GOING_OFFLINE;
+			start_paddr = page_to_pfn(first_page) <<PAGE_SHIFT;
+			ret = remove_memory(start_paddr, SECTION_SIZE, 0);
+			printk(KERN_DEBUG "%s(%p, %ld) remove_memory() res: %d\n",
+					__func__, mem, action, ret);
+			if (ret)
+				mem->state = old_state;
+			break;
+		default:
+			printk(KERN_WARNING "%s(%p, %ld) unknown action: %ld\n", __func__,
+				mem, action, action);
+			ret = -EINVAL;
+	}
+
+	return ret;
+}
+#else
+static int
+memory_block_action(struct memory_block *mem, unsigned long action)
+{
+	printk(KERN_WARNING "%s() failed to perform action: %d, NONLINEAR is off\n");
+	return -ENOSYS;
+}
+#endif
+
+/*
+ * These to_state and from_state things really are just state
+ * machine changes.  It might just be better to declare them
+ * all in a table instead of in code like this.
+ */
+static int memory_block_change_state(struct memory_block *mem,
+		unsigned long to_state, unsigned long from_state_req)
 {
 	int ret = 0;
 	down(&mem->state_sem);
 
-	if (mem->state != MEM_OFFLINE)
-		return -EINVAL;
+	if (mem->state != from_state_req) {
+		ret = -EINVAL;
+		goto out;
+	}
 
-	ret = __online_memory_block(mem);
+	ret = memory_block_action(mem, to_state);
 	if (!ret)
-		mem->state = MEM_ONLINE;
+		mem->state = to_state;
 
+out:
 	up(&mem->state_sem);
-
 	return ret;
 }
 
@@ -152,6 +226,7 @@ store_mem_state(struct sys_device *dev, 
 		container_of(dev, struct memory_block, sysdev);
 	unsigned int phys_section_nr = mem->phys_index;
 	unsigned int section = phys_section[phys_section_nr];
+	int ret = -EINVAL;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EACCES;
@@ -159,16 +234,17 @@ store_mem_state(struct sys_device *dev, 
 	if (mem_section[section].phys_section == INVALID_SECTION) {
 		printk("%s: Ummm.. this section (%d) is not currently mapped!\n",
 			__func__, section);
-		return -EINVAL;
+		goto out;
 	}
 
-	if (!strncmp(buf, "online", min((int)count,6))) {
-		online_memory_block(mem);
-	} else if(!strncmp(buf, "offline", min((int)count,7)))
-		return -ENOSYS;
-	else
-		return -EINVAL;
+	if (!strncmp(buf, "online", min((int)count, 6)))
+		ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
+	else if(!strncmp(buf, "offline", min((int)count, 7)))
+		ret = memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE);
 
+out:
+	if (ret)
+		return ret;
 	return count;
 }
 
@@ -237,7 +313,7 @@ memory_probe_store(struct class *class, 
 	//if (!ram_present(phys_addr, PAGES_PER_SECTION))
 	//	return -EINVAL;
 
-	ret = add_pages(phys_addr, (PAGES_PER_SECTION << PAGE_SHIFT), 0);
+	ret = add_memory(phys_addr, (PAGES_PER_SECTION << PAGE_SHIFT), 0);
 
 	if (ret)
 		count = ret;
@@ -333,6 +409,61 @@ static int online_init(void)
 }
 
 /*
+ * For now, we have a linear search to go find the appropriate
+ * memory_block corresponding to a particular phys_index. If
+ * this gets to be a real problem, we can always use a radix
+ * tree or something here.
+ *
+ * This could be made generic for all sysdev classes.
+ */
+struct memory_block *find_memory_block(unsigned long section)
+{
+	struct kobject *kobj;
+	struct sys_device *sysdev;
+	struct memory_block *mem
+	char name[sizeof(MEMORY_CLASS_NAME) + 9 + 1];
+
+	/*
+	 * This only works because we know that section == sysdev->id
+	 * slightly redundant with sysdev_register()
+	 */
+	sprintf(&name[0], "%s%d", MEMORY_CLASS_NAME, section);
+
+	kobj = kset_find_obj(&memory_sysdev_class.kset, name);
+	sysdev = to_sysdev(kobj);
+	mem = container_of(dev, struct memory_block, sysdev);
+
+	return mem;
+}
+
+int remove_memory_block(unsigned long node_id, unsigned int section,
+		unsigned int phys_index, unsigned long state,
+		int phys_device)
+{
+	struct memory_block *mem, *tmp;
+	int ret0, ret1, ret2, ret3;
+
+	printk("Hit %s\n", __func__);
+
+	mem = find_memory_block(section);
+
+#if 0
+	/* not yet sure how this can be optimally structured
+	 * to get the fru information from hw/fw specific drivers
+	 */
+	if (mem->callback)
+		callback(mem);
+#endif
+
+	mem_remove_simple_file(mem, phys_index);
+	mem_remove_simple_file(mem, state);
+	mem_remove_simple_file(mem, phys_device);
+	unregister_memory(mem, section, NULL);
+
+	return 0;
+}
+
+/*
  * need an interface for the VM to add new memory regions,
  * but without onlining it.
  */
@@ -349,6 +480,21 @@ int register_new_memory(unsigned int sec
 	return add_memory_block(0, section, phys_index, MEM_OFFLINE, 0);
 }
 
+int unregister_memory_section(unsigned int section)
+{
+	unsigned int phys_index = mem_section[section].phys_section;
+
+	printk("Hit %s\n", __func__);
+
+	if (phys_index == INVALID_SECTION) {
+		printk("%s: phys_index 0x%x is BOGUS!\n", __func__, phys_index);
+		return -EINVAL;
+	}
+
+	/* need some node info here and some sort of callback .... */
+	return remove_memory_block(0, section, phys_index, 0);
+}
+
 /*
  * Initialize the sysfs support for memory devices...
  */
diff -puN include/linux/memory.h~N1-sysfs-memory-class-remove include/linux/memory.h
--- memhotplug/include/linux/memory.h~N1-sysfs-memory-class-remove	2004-12-10 13:52:58.000000000 -0800
+++ memhotplug-dave/include/linux/memory.h	2004-12-10 13:52:58.000000000 -0800
@@ -8,7 +8,7 @@
  * Basic handling of the devices is done in drivers/base/memory.c
  * and system devices are handled in drivers/base/sys.c.
  *
- * Memory block are exported via driverfs in the class/memory/devices/
+ * Memory block are exported via sysfs in the class/memory/devices/
  * directory.
  *
  */
@@ -18,8 +18,6 @@
 #include <linux/sysdev.h>
 #include <linux/node.h>
 #include <linux/compiler.h>
-#include <linux/nonlinear.h>
-#include <linux/memory_hotplug.h>
 
 #include <asm/semaphore.h>
 
@@ -40,20 +38,15 @@ extern int memory_dev_init(void);
 
 #define CONFIG_ARCH_MEMORY_PROBE 1
 
-/*
- * Temporary shim until there is a control/ directory
- */
-enum memory_state {
-	MEM_ONLINE,
-	MEM_OFFLINE,
-	MEM_GOING_OFFLINE,
-	MEM_INVALID,		/* huh? */
-	MEM_BROKEN		/* ouch */
-};
+#define	MEM_ONLINE		(1<<0)
+#define	MEM_OFFLINE		(1<<1)
+#define	MEM_GOING_OFFLINE	(1<<2)
+#define MEM_INVALID		(1<<3)
+#define MEM_BROKEN		(1<<4)
 
 struct memory_block {
 	unsigned long phys_index;
-	enum memory_state state; 	/* just filler for now */
+	unsigned long state;
 	struct semaphore state_sem;
 	int phys_device;		/* to which fru does this belong? */
 	void *hw;			/* optional pointer to fw/hw data */
diff -puN include/linux/memory_hotplug.h~N1-sysfs-memory-class-remove include/linux/memory_hotplug.h
--- memhotplug/include/linux/memory_hotplug.h~N1-sysfs-memory-class-remove	2004-12-10 13:52:58.000000000 -0800
+++ memhotplug-dave/include/linux/memory_hotplug.h	2004-12-10 13:52:58.000000000 -0800
@@ -7,11 +7,9 @@ extern int add_one_highpage(struct page 
 /* need some defines for these for archs that don't support it */
 extern void online_page(struct page *page);
 /* VM interface that may be used by firmware interface */
-extern int add_pages(u64 start, u64 size, unsigned long attr);
-extern int remove_pages(u64 start, u64 size, unsigned long attr);
-
-struct memory_block;
-extern int __online_memory_block(struct memory_block *);
+extern int add_memory(u64 start, u64 size, unsigned long attr);
+extern int remove_memory(u64 start, u64 size, unsigned long attr);
+extern int online_pages(unsigned long, unsigned long);
 
 #ifdef CONFIG_MEMORY_HOTPLUG
 /* reasonably generic interface to expand the physical pages in a zone  */
diff -puN mm/memory_hotplug.c~N1-sysfs-memory-class-remove mm/memory_hotplug.c
--- memhotplug/mm/memory_hotplug.c~N1-sysfs-memory-class-remove	2004-12-10 13:52:58.000000000 -0800
+++ memhotplug-dave/mm/memory_hotplug.c	2004-12-10 13:52:58.000000000 -0800
@@ -121,8 +121,7 @@ int __add_pages(struct zone *zone, unsig
 	return err;
 }
 
-static int
-online_pages(unsigned long pfn, unsigned long nr_pages)
+int online_pages(unsigned long pfn, unsigned long nr_pages)
 {
 	int i;
 
@@ -141,45 +140,6 @@ online_pages(unsigned long pfn, unsigned
 	return 0;
 }
 
-#ifdef CONFIG_NONLINEAR
-/* this can't stay here.  it needs to go into nonlinear.c or something */
-int
-__online_memory_block(struct memory_block *mem)
-{
-	int i;
-	unsigned long section;
-
-	/*
-	 * this eventually needs to be a loop so that a memory_block
-	 * can contain more than a single section
-	 */
-	section = mem->phys_index; //pfn_to_section()??
-	//len = mem->phys_length;
-	if (mem_section[section].phys_section == INVALID_SECTION)
-		return -EINVAL;
-
-	for (i = 0; i < PAGES_PER_SECTION; i++) {
-		struct page *page = &mem_section[section].mem_map[i];
-		if (!PageReserved(page)) {
-			printk("%s: Hmm, interesting. \n", __func__);
-			return -EBUSY;
-		}
-	}
-
-	return online_pages(page_to_pfn(mem_section[section].mem_map),
-		PAGES_PER_SECTION);
-}
-#else
-int
-__online_memory_block(struct memory_block *mem)
-{
-	printk("%s() called without CONFIG_NONLINEAR being enabled\n", __func__);
-	return -ENOSYS;
-}
-#endif
-
-
-
 int __remove_pages(struct zone *zone, unsigned long start_pfn,
 		unsigned long nr_pages, unsigned long attr)
 {
_
