

Any pages on the LRU lists can be migrated with this patch.
Pages on the page-cache, on the swap-cache and anonymous pages
are handled in the same way.

Signed-off-by: Hirokazu Takahashi <taka@valinux.co.jp>
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---

 memhotplug1-dave/arch/i386/Kconfig        |    5 
 memhotplug1-dave/include/linux/mmigrate.h |   11 
 memhotplug1-dave/mm/Makefile              |    1 
 memhotplug1-dave/mm/mmigrate.c            |  370 ++++++++++++++++++++++++++++++
 4 files changed, 387 insertions(+)

diff -puN arch/i386/Kconfig~P07-memory_migration arch/i386/Kconfig
--- memhotplug1/arch/i386/Kconfig~P07-memory_migration	2004-11-04 16:46:01.000000000 -0800
+++ memhotplug1-dave/arch/i386/Kconfig	2004-11-04 16:46:01.000000000 -0800
@@ -780,6 +780,11 @@ config ARCH_HAS_MEM_MAP
 config MEMORY_HOTPLUG
 	bool "Allow for memory hotplug"
 	depends on NONLINEAR && HOTPLUG
+	select MEMORY_MIGRATE
+
+config MEMORY_MIGRATE
+	bool "Memory migration"
+	default y if MEMORY_HOTPLUG
 
 config HIGHPTE
 	bool "Allocate 3rd-level pagetables from highmem"
diff -puN /dev/null include/linux/mmigrate.h
--- /dev/null	2004-08-06 10:20:23.000000000 -0700
+++ memhotplug1-dave/include/linux/mmigrate.h	2004-11-04 16:46:01.000000000 -0800
@@ -0,0 +1,11 @@
+#ifndef _LINUX_MEMHOTPLUG_H
+#define _LINUX_MEMHOTPLUG_H
+
+#include <linux/config.h>
+#include <linux/mm.h>
+
+
+extern struct page * migrate_onepage(struct page *);
+extern int try_to_migrate_pages(struct list_head *);
+
+#endif /* _LINUX_MEMHOTPLUG_H */
diff -puN mm/Makefile~P07-memory_migration mm/Makefile
--- memhotplug1/mm/Makefile~P07-memory_migration	2004-11-04 16:46:01.000000000 -0800
+++ memhotplug1-dave/mm/Makefile	2004-11-04 16:46:01.000000000 -0800
@@ -19,4 +19,5 @@ obj-$(CONFIG_SHMEM) += shmem.o
 obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o
 obj-$(CONFIG_NONLINEAR)	+= nonlinear.o
 obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
+obj-$(CONFIG_MEMORY_MIGRATE) += mmigrate.o
 
diff -puN /dev/null mm/mmigrate.c
--- /dev/null	2004-08-06 10:20:23.000000000 -0700
+++ memhotplug1-dave/mm/mmigrate.c	2004-11-04 16:46:01.000000000 -0800
@@ -0,0 +1,370 @@
+/*
+ *  linux/mm/mmigrate.c
+ *
+ *  Support of memory hotplug
+ *
+ *  Authors:	IWAMOTO Toshihiro <iwamoto@valinux.co.jp>
+ *		Hirokazu Takahashi <taka@valinux.co.jp>
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/swap.h>
+#include <linux/pagemap.h>
+#include <linux/init.h>
+#include <linux/highmem.h>
+#include <linux/writeback.h>
+#include <linux/backing-dev.h>
+#include <linux/buffer_head.h>
+#include <linux/mm_inline.h>
+#include <linux/rmap.h>
+#include <linux/mmigrate.h>
+#include <linux/delay.h>
+
+/*
+ * The concept of memory migration is to replace a target page with
+ * a substitute page on a radix tree. New requests to access the target
+ * - including system calls and page faults - are redirected to the
+ * substitute that is locked and not up-to-date, so that all of these
+ * requests are blocked until the migration has done. Data of the target
+ * is copied into the substitute and then the requests are unblocked
+ * after all operations against the target have finished.
+ *
+ * By this approach, regular pages in the swapcache/pagecache and
+ * hugetlbpages can be handled in the same way.
+ */
+
+
+/*
+ * Try to writeback a dirty page to free its buffers.
+ */
+static int
+writeback_and_free_buffers(struct page *page)
+{
+	struct address_space *mapping = page_mapping(page);
+
+	BUG_ON(!PageLocked(page));
+	wait_on_page_writeback(page);
+	if (!PagePrivate(page))
+		return 0;
+
+	if (PageDirty(page)) {
+		switch(pageout(page, mapping)) {
+		case PAGE_ACTIVATE:
+			return -1;
+		case PAGE_SUCCESS:
+			lock_page(page);
+			return 1;
+		case PAGE_KEEP:
+		case PAGE_CLEAN:
+			break;
+		}
+	}
+	if (try_to_release_page(page, GFP_KERNEL))
+		return 0;
+
+	return -1;
+}
+
+/*
+ * Replace "page" with "newpage" on the radix tree, which the page belongs to.
+ */
+static int
+replace_pages(struct page *page, struct page *newpage)
+{
+	struct address_space *mapping = page_mapping(page);
+	int ret = 0;
+	struct page *delpage;
+
+	page_cache_get(newpage);
+	write_lock_irq(&mapping->tree_lock);
+	newpage->index = page->index;
+	if  (PageSwapCache(page)) {
+		SetPageSwapCache(newpage);
+		newpage->private = page->private;
+	} else
+		newpage->mapping = page->mapping;
+	if (PageWriteback(page))
+		SetPageWriteback(newpage);
+
+	delpage = radix_tree_replace(&mapping->page_tree, page_index(page), newpage);
+	write_unlock_irq(&mapping->tree_lock);
+	if (delpage == NULL) {
+		/*
+		 * Migration is unnecessary since truncating the page is
+		 * in progress. Just release the newpage.
+		 */
+		page_cache_release(newpage);
+		ret = -ENOENT;
+	}
+	return ret;
+}
+
+/*
+ * Check whether the page can be migrated or not.
+ */
+static inline int
+page_migratable(struct page *page, struct page *newpage,
+			int freeable_page_count)
+{
+	int truncated;
+
+	if (page_mapped(page)) {
+		switch (try_to_unmap(page)) {
+		case SWAP_FAIL:
+			return -EBUSY;
+		case SWAP_AGAIN:
+			return -EAGAIN;
+		}
+	}
+	if (PageWriteback(page))
+		return -EAGAIN;
+	/* The page might have been truncated */
+	truncated = !PageSwapCache(newpage) && page_mapping(page) == NULL;
+	if (page_count(page) + truncated <= freeable_page_count)
+		return truncated ? -ENOENT : 0;
+	return -EAGAIN;
+}
+
+/*
+ * Wait for the completion of all operations, which are going on
+ * against the page, and copy it.
+ */
+int
+migrate_page_common(struct page *page, struct page *newpage)
+{
+	long timeout = 5000;	/* XXXX */
+	int ret;
+
+	while (timeout > 0) {
+		BUG_ON(page_count(page) == 0);
+		ret = page_migratable(page, newpage, 2);
+		switch (ret) {
+		case 0:
+		case -ENOENT:
+			copy_highpage(newpage, page);
+			return ret;
+		case -EBUSY:
+			return ret;
+		case -EAGAIN:
+			writeback_and_free_buffers(page);
+			unlock_page(page);
+			msleep(10);
+			timeout -= 10;
+			lock_page(page);
+			continue;
+		}
+	}
+	return -EBUSY;
+}
+
+/*
+ * In some cases, a page migration needs to be rolled back.
+ */
+static int
+rewind_page(struct page *page, struct page *newpage)
+{
+	printk("Roll back migration is not implemented yet.\n");
+	BUG();
+	return 1;
+}
+
+/*
+ * Try to migrate one page.  Returns non-zero on failure.
+ *   - Lock for the page must be held when invoked.
+ *   - The page must be attached to an address_space.
+ */
+static int
+generic_migrate_page(struct page *page, struct page *newpage)
+{
+	int ret;
+
+	/*
+	 * Make sure that the newpage must be locked and keep not up-to-date
+	 * during the page migration, so that it's guaranteed that all
+	 * accesses to the newpage will be blocked until everything has
+	 * become ok.
+	 */
+	if (TestSetPageLocked(newpage))
+		BUG();
+
+	if ((ret = replace_pages(page, newpage)))
+		goto out_removing;
+
+	/*
+	 * With cleared PTEs, any accesses via the PTEs to the page
+	 * can be caught and blocked in a pagefault handler.
+	 */
+	if (page_mapped(page)) {
+		while ((ret = try_to_unmap(page)) == SWAP_AGAIN)
+			msleep(1);
+		if (ret != SWAP_SUCCESS) {
+			ret = -EBUSY;
+			goto out_busy;
+		}
+	}
+
+	wait_on_page_writeback(page);
+	if (PageSwapCache(page)) {
+		/*
+		 * The page is not mapped from anywhere now.
+		 * Detach it from the swapcache completely.
+		 */
+		ClearPageSwapCache(page);
+		page->private = 0;
+		page->mapping = NULL;
+	}
+
+	/* Wait for all operations against the page to finish. */
+	ret = migrate_page_common(page, newpage);
+	switch (ret) {
+	default:
+		/* The page is busy. Try it later. */
+		goto out_busy;
+	case -ENOENT:
+		/* The file the page belongs to has been truncated. */
+		page_cache_get(page);
+		page_cache_release(newpage);
+		newpage->mapping = NULL;
+		/* fall thru */
+	case 0:
+		/* fall thru */
+	}
+
+	if (PageError(page))
+		SetPageError(newpage);
+	if (PageReferenced(page))
+		SetPageReferenced(newpage);
+	if (PageActive(page)) {
+		SetPageActive(newpage);
+		ClearPageActive(page);
+	}
+	if (PageMappedToDisk(page))
+		SetPageMappedToDisk(newpage);
+	if (PageChecked(page))
+		SetPageChecked(newpage);
+	if (PageUptodate(page))
+		SetPageUptodate(newpage);
+	if (PageDirty(page)) {
+		clear_page_dirty_for_io(page);
+		set_page_dirty(newpage);
+	}
+	/*
+	 * Finally, the newpage has become ready! Wake up all waiters,
+	 * which have been waiting for the completion of the migration.
+	 */
+	if (PageWriteback(newpage))
+		end_page_writeback(newpage);
+	unlock_page(newpage);
+
+	page->mapping = NULL;
+	unlock_page(page);
+	page_cache_release(page);
+
+	return 0;
+
+out_busy:
+	/* Roll back all operations. */
+	rewind_page(page, newpage);
+	return ret;
+
+out_removing:
+	unlock_page(page);
+	unlock_page(newpage);
+	return ret;
+}
+
+/*
+ * migrate_onepage() can migrate regular pages assigned to pagecache,
+ * swapcache or anonymous memory.
+ */
+struct page *
+migrate_onepage(struct page *page)
+{
+	struct page *newpage;
+	struct address_space *mapping;
+	int ret;
+
+	lock_page(page);
+
+	/*
+	 * Put the page in a radix tree if it isn't in the tree yet.
+	 */
+#ifdef CONFIG_SWAP
+	if (PageAnon(page) && !PageSwapCache(page))
+		if (!add_to_swap(page, GFP_KERNEL)) {
+			unlock_page(page);
+			return ERR_PTR(-ENOSPC);
+		}
+#endif /* CONFIG_SWAP */
+	if ((mapping = page_mapping(page)) == NULL) {
+		/* truncation is in progress */
+		if (PagePrivate(page))
+			try_to_release_page(page, GFP_KERNEL);
+		unlock_page(page);
+		return ERR_PTR(-ENOENT);
+	}
+
+	/*
+	 * Allocate a new page with the same gfp_mask
+	 * as the target page has.
+	 */
+	if ((newpage = page_cache_alloc(mapping)) == NULL) {
+		unlock_page(page);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	ret = generic_migrate_page(page, newpage);
+	if (ret) {
+		BUG_ON(page_count(newpage) != 1);
+		page_cache_release(newpage);
+		return ERR_PTR(ret);
+	}
+	BUG_ON(page_count(page) != 1);
+	page_cache_release(page);
+	return newpage;
+}
+
+/*
+ * This is the main entry point to migrate pages in a specific region.
+ * If a page is inactive, the page may be just released instead of
+ * migration.
+ */
+int try_to_migrate_pages(struct list_head *page_list)
+{
+	struct page *page, *page2, *newpage;
+	LIST_HEAD(rest_list);
+	int nr_busy = 0;
+	int nr_noswap = 0;
+
+	current->flags |= PF_KSWAPD;    /*  It's fake */
+	list_for_each_entry_safe(page, page2, page_list, lru) {
+		list_del(&page->lru);
+		if (IS_ERR(newpage = migrate_onepage(page))) {
+			if (page_count(page) == 1) {
+				/* the page is already unused */
+				putback_page_to_lru(page_zone(page), page);
+				page_cache_release(page);
+			} else {
+				/* truncation may be in progress now. */
+				nr_busy++;
+				if (PTR_ERR(newpage) == -ENOSPC)
+					nr_noswap++;
+				list_add(&page->lru, &rest_list);
+			}
+		} else {
+			putback_page_to_lru(page_zone(newpage), newpage);
+			page_cache_release(newpage);
+		}
+	}
+	list_splice(&rest_list, page_list);
+	current->flags &= ~PF_KSWAPD;
+	if (nr_noswap) {
+		if (printk_ratelimit())
+			printk(KERN_WARNING "memory migration failed: Any swap devices should be added.\n");
+		return -ENOSPC;
+	}
+	return nr_busy;
+}
+
_
