Signed-off-by: Dave Hansen --- lxc-dave/kernel/cpuset.c | 25 ++++++++++++++++-- lxc-dave/mm/page_alloc.c | 4 +- lxc-dave/mm/rmap.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++ lxc-dave/mm/vmscan.c | 18 ++++++++++++- 4 files changed, 105 insertions(+), 6 deletions(-) diff -puN kernel/cpuset.c~modify-lru-walk kernel/cpuset.c --- lxc/kernel/cpuset.c~modify-lru-walk 2006-08-11 17:26:45.000000000 -0700 +++ lxc-dave/kernel/cpuset.c 2006-08-11 17:26:46.000000000 -0700 @@ -117,10 +117,18 @@ typedef enum { void cpuset_inc_nr_pages(struct cpuset *cs, int nr) { + if (!cs) + return; + //if (printk_ratelimit()) + // printk("%s(%p) nr_pages: %d\n", __func__, cs, cs->mems_nr_pages); cs->mems_nr_pages += nr; } void cpuset_dec_nr_pages(struct cpuset *cs, int nr) { + if (!cs) + return; + //if (printk_ratelimit()) + // printk("%s(%p) nr_pages: %d\n", __func__, cs, cs->mems_nr_pages); cs->mems_nr_pages += nr; } int cpuset_get_nr_pages(const struct cpuset *cs) @@ -129,7 +137,11 @@ int cpuset_get_nr_pages(const struct cpu } int cpuset_amount_over_memory_max(const struct cpuset *cs) { - int amount = cs->mems_max_pages - cs->mems_nr_pages; + int amount; + + if (!cs || cs->mems_max_pages < 0) + return 0; + amount = cs->mems_max_pages - cs->mems_nr_pages; if (amount < 0) amount = 0; return amount; @@ -197,6 +209,8 @@ static struct cpuset top_cpuset = { .flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)), .cpus_allowed = CPU_MASK_ALL, .mems_allowed = NODE_MASK_ALL, + .mems_nr_pages = 0, + .mems_max_pages = -1, .count = ATOMIC_INIT(0), .sibling = LIST_HEAD_INIT(top_cpuset.sibling), .children = LIST_HEAD_INIT(top_cpuset.children), @@ -953,6 +967,8 @@ static int update_nodemask(struct cpuset mutex_lock(&callback_mutex); cs->mems_allowed = trialcs.mems_allowed; cs->mems_generation = cpuset_mems_generation++; + //cs->mems_max_pages = -1; + //cs->mems_nr_pages = 0; mutex_unlock(&callback_mutex); set_cpuset_being_rebound(cs); /* causes mpol_copy() rebind */ @@ -1834,7 +1850,7 @@ static struct cftype cft_mem_used = { .private = FILE_MEMORY_USED, }; -static struct cftype cft_mem_max_rate = { +static struct cftype cft_mem_max = { .name = "memory_max_pages", .private = FILE_MEMORY_MAX, }; @@ -1884,7 +1900,7 @@ static int cpuset_populate_dir(struct de return err; if ((err = cpuset_add_file(cs_dentry, &cft_cpu_exclusive)) < 0) return err; - if ((err = cpuset_add_file(cs_dentry, &cft_mem_max_rate)) < 0) + if ((err = cpuset_add_file(cs_dentry, &cft_mem_max)) < 0) return err; if ((err = cpuset_add_file(cs_dentry, &cft_mem_used)) < 0) return err; @@ -1939,6 +1955,7 @@ static long cpuset_create(struct cpuset INIT_LIST_HEAD(&cs->children); cs->mems_generation = cpuset_mems_generation++; cs->mems_max_pages = parent->mems_max_pages; + cs->mems_nr_pages = 0; fmeter_init(&cs->fmeter); cs->parent = parent; @@ -2046,6 +2063,7 @@ int __init cpuset_init_early(void) tsk->cpuset = &top_cpuset; tsk->cpuset->mems_generation = cpuset_mems_generation++; tsk->cpuset->mems_max_pages = -1; + tsk->cpuset->mems_nr_pages = 0; if (tsk->mm) tsk->mm->cpuset = tsk->cpuset; return 0; @@ -2068,6 +2086,7 @@ int __init cpuset_init(void) fmeter_init(&top_cpuset.fmeter); top_cpuset.mems_generation = cpuset_mems_generation++; top_cpuset.mems_max_pages = -1; + top_cpuset.mems_nr_pages = 0; init_task.cpuset = &top_cpuset; diff -puN mm/page_alloc.c~modify-lru-walk mm/page_alloc.c --- lxc/mm/page_alloc.c~modify-lru-walk 2006-08-11 17:26:45.000000000 -0700 +++ lxc-dave/mm/page_alloc.c 2006-08-11 17:26:46.000000000 -0700 @@ -1154,7 +1154,7 @@ got_pg: if (page) set_page_owner(page, order, gfp_mask); #endif - cpuset_inc_nr_pages(current->cpuset, PAGE_SIZE<cpuset, 1<cpuset = current->cpuset; return page; } @@ -1203,7 +1203,7 @@ void __pagevec_free(struct pagevec *pvec fastcall void __free_pages(struct page *page, unsigned int order) { - cpuset_dec_nr_pages(page->cpuset, PAGE_SIZE<cpuset, 1<head, anon_vma_node) { + if (!cpuset_amount_over_memory_max(vma->vm_mm->cpuset)) + continue; + + ret = 1; + break; + } + spin_unlock(&anon_vma->lock); + return ret; +} + +static int file_page_has_naughty_cpuset(struct page *page) +{ + struct address_space *mapping = page->mapping; + pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); + struct vm_area_struct *vma; + struct prio_tree_iter iter; + struct cpuset *ret = NULL; + + spin_lock(&mapping->i_mmap_lock); + vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { + if (!cpuset_amount_over_memory_max(vma->vm_mm->cpuset)) + continue; + ret = vma->vm_mm->cpuset; + break; + } + + if (ret) + goto out;; + + list_for_each_entry(vma, &mapping->i_mmap_nonlinear, + shared.vm_set.list) { + if (!cpuset_amount_over_memory_max(vma->vm_mm->cpuset)) + continue; + ret = vma->vm_mm->cpuset; + break; + } + +out: + spin_unlock(&mapping->i_mmap_lock); + return ret; +} + +int page_has_naughty_cpuset(struct page *page) +{ + //BUG_ON(!PageLocked(page)); + return cpuset_amount_over_memory_max(page->cpuset); + + //if (PageAnon(page)) + // return anon_page_has_naughty_cpuset(page); + //return file_page_has_naughty_cpuset(page); +} + diff -puN mm/vmscan.c~modify-lru-walk mm/vmscan.c --- lxc/mm/vmscan.c~modify-lru-walk 2006-08-11 17:26:18.000000000 -0700 +++ lxc-dave/mm/vmscan.c 2006-08-11 17:26:46.000000000 -0700 @@ -63,8 +63,8 @@ struct scan_control { int swap_cluster_max; int swappiness; - int all_unreclaimable; + int only_pages_with_naughty_cpusets; }; #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) @@ -793,9 +793,15 @@ force_reclaim_mapped: spin_unlock_irq(&zone->lru_lock); while (!list_empty(&l_hold)) { + extern int page_has_naughty_cpuset(struct page *page); cond_resched(); page = lru_to_page(&l_hold); list_del(&page->lru); + if (sc->only_pages_with_naughty_cpusets && + !page_has_naughty_cpuset(page)) { + list_add(&page->lru, &l_active); + continue; + } if (page_mapped(page)) { if (!reclaim_mapped || (total_swap_pages == 0 && PageAnon(page)) || @@ -875,6 +881,7 @@ static unsigned long shrink_zone(int pri unsigned long nr_inactive; unsigned long nr_to_scan; unsigned long nr_reclaimed = 0; + int nr_scans = 0; atomic_inc(&zone->reclaim_in_progress); @@ -897,6 +904,11 @@ static unsigned long shrink_zone(int pri nr_inactive = 0; while (nr_active || nr_inactive) { + nr_scans++; + if (printk_ratelimit()) + printk("%s() scan nr: %d\n", __func__, nr_scans); + if (nr_scans > 10) + sc->only_pages_with_naughty_cpusets = 0; if (nr_active) { nr_to_scan = min(nr_active, (unsigned long)sc->swap_cluster_max); @@ -993,6 +1005,7 @@ unsigned long try_to_free_pages(struct z .swap_cluster_max = SWAP_CLUSTER_MAX, .may_swap = 1, .swappiness = vm_swappiness, + .only_pages_with_naughty_cpusets = 1, }; delay_swap_prefetch(); @@ -1090,6 +1103,7 @@ static unsigned long balance_pgdat(pg_da .may_swap = 1, .swap_cluster_max = SWAP_CLUSTER_MAX, .swappiness = vm_swappiness, + .only_pages_with_naughty_cpusets = 1, }; loop_again: @@ -1376,6 +1390,7 @@ unsigned long shrink_all_memory(unsigned .swap_cluster_max = nr_pages, .may_writepage = 1, .swappiness = vm_swappiness, + .only_pages_with_naughty_cpusets = 1, }; delay_swap_prefetch(); @@ -1568,6 +1583,7 @@ static int __zone_reclaim(struct zone *z SWAP_CLUSTER_MAX), .gfp_mask = gfp_mask, .swappiness = vm_swappiness, + .only_pages_with_naughty_cpusets = 1, }; disable_swap_token(); _