diff -Naur linux-2.6.28/include/linux/swap.h linux-2.6.28-tmem/include/linux/swap.h --- linux-2.6.28/include/linux/swap.h 2008-12-24 16:26:37.000000000 -0700 +++ linux-2.6.28-tmem/include/linux/swap.h 2009-01-05 16:57:43.000000000 -0700 @@ -149,9 +149,33 @@ unsigned int pages; unsigned int max; unsigned int inuse_pages; + unsigned long * preswap_map; + unsigned int preswap_pages; int next; /* next entry on swap list */ }; +#ifdef CONFIG_PRESWAP +/* in preswap.c */ +extern void preswap_shrink(unsigned long); /* REMOVE ME AFTER REMOVE HACK */ +extern int preswap_test(struct swap_info_struct *, unsigned long); +extern void preswap_init(unsigned); +extern int preswap_put(struct page *); +extern int preswap_get(struct page *); +extern void preswap_flush(unsigned, unsigned long); +extern void preswap_flush_area(unsigned); +/* in swapfile.c */ +extern int try_to_unuse(unsigned int, unsigned int, unsigned long); +#define preswap_malloc vmalloc +#else +#define preswap_test(_x,_y) (0) +#define preswap_init(_x) do { } while(0) +#define preswap_put(_x) (0) +#define preswap_get(_x) (0) +#define preswap_flush(_x,_y) do { } while(0) +#define preswap_flush_area(_x) do { } while(0) +#define preswap_malloc(_x) (0) +#endif + struct swap_list_t { int head; /* head of priority-ordered swapfile list */ int next; /* swapfile to be used next */ diff -Naur linux-2.6.28/mm/swapfile.c linux-2.6.28-tmem/mm/swapfile.c --- linux-2.6.28/mm/swapfile.c 2008-12-24 16:26:37.000000000 -0700 +++ linux-2.6.28-tmem/mm/swapfile.c 2009-01-05 17:06:46.000000000 -0700 @@ -33,7 +33,7 @@ #include #include -static DEFINE_SPINLOCK(swap_lock); +DEFINE_SPINLOCK(swap_lock); static unsigned int nr_swapfiles; long total_swap_pages; static int swap_overflow; @@ -44,7 +44,7 @@ static const char Bad_offset[] = "Bad swap offset entry "; static const char Unused_offset[] = "Unused swap offset entry "; -static struct swap_list_t swap_list = {-1, -1}; +struct swap_list_t swap_list = {-1, -1}; static struct swap_info_struct swap_info[MAX_SWAPFILES]; @@ -286,6 +286,7 @@ swap_list.next = p - swap_info; nr_swap_pages++; p->inuse_pages--; + preswap_flush(p - swap_info, offset); } } return count; @@ -696,7 +697,7 @@ * Recycle to start on reaching the end, returning 0 when empty. */ static unsigned int find_next_to_unuse(struct swap_info_struct *si, - unsigned int prev) + unsigned int prev, unsigned int preswap) { unsigned int max = si->max; unsigned int i = prev; @@ -722,6 +723,10 @@ prev = 0; i = 1; } + if (preswap) { + if (preswap_test(si,i)) break; + else continue; + } count = si->swap_map[i]; if (count && count != SWAP_MAP_BAD) break; @@ -733,8 +738,12 @@ * We completely avoid races by reading each swap page in advance, * and then search for the process using it. All the necessary * page table adjustments can then be made atomically. + * + * if the boolean preswap is true, only unuse pages_to_unuse pages; + * pages_to_unuse==0 means all pages */ -static int try_to_unuse(unsigned int type) +int try_to_unuse(unsigned int type, unsigned int preswap, + unsigned long pages_to_unuse) { struct swap_info_struct * si = &swap_info[type]; struct mm_struct *start_mm; @@ -770,7 +779,7 @@ * one pass through swap_map is enough, but not necessarily: * there are races when an instance of an entry might be missed. */ - while ((i = find_next_to_unuse(si, i)) != 0) { + while ((i = find_next_to_unuse(si, i, preswap)) != 0) { if (signal_pending(current)) { retval = -EINTR; break; @@ -947,6 +956,8 @@ * interactive performance. */ cond_resched(); + if (preswap && pages_to_unuse && !--pages_to_unuse) + break; } mmput(start_mm); @@ -1291,7 +1302,7 @@ spin_unlock(&swap_lock); current->flags |= PF_SWAPOFF; - err = try_to_unuse(type); + err = try_to_unuse(type,0,0); current->flags &= ~PF_SWAPOFF; if (err) { @@ -1340,9 +1351,12 @@ swap_map = p->swap_map; p->swap_map = NULL; p->flags = 0; + preswap_flush_area(p - swap_info); spin_unlock(&swap_lock); mutex_unlock(&swapon_mutex); vfree(swap_map); + if (p->preswap_map) + vfree(p->preswap_map); inode = mapping->host; if (S_ISBLK(inode->i_mode)) { struct block_device *bdev = I_BDEV(inode); @@ -1657,6 +1671,11 @@ error = 0; memset(swap_map, 0, maxpages * sizeof(short)); + + p->preswap_map = preswap_malloc(maxpages / sizeof(long)); + if (p->preswap_map) + memset(p->preswap_map, 0, maxpages / sizeof(long)); + for (i = 0; i < swap_header->info.nr_badpages; i++) { int page_nr = swap_header->info.badpages[i]; if (page_nr <= 0 || page_nr >= swap_header->info.last_page) @@ -1719,6 +1738,7 @@ } else { swap_info[prev].next = p - swap_info; } + preswap_init(p - swap_info); spin_unlock(&swap_lock); mutex_unlock(&swapon_mutex); error = 0; diff -Naur linux-2.6.28/mm/page_io.c linux-2.6.28-tmem/mm/page_io.c --- linux-2.6.28/mm/page_io.c 2008-12-24 16:26:37.000000000 -0700 +++ linux-2.6.28-tmem/mm/page_io.c 2009-01-05 17:11:38.000000000 -0700 @@ -102,6 +102,12 @@ unlock_page(page); goto out; } + if (preswap_put(page) == 1) { + set_page_writeback(page); + unlock_page(page); + end_page_writeback(page); + goto out; + } bio = get_swap_bio(GFP_NOIO, page_private(page), page, end_swap_bio_write); if (bio == NULL) { @@ -127,6 +133,11 @@ BUG_ON(!PageLocked(page)); BUG_ON(PageUptodate(page)); + if (preswap_get(page) == 1) { + SetPageUptodate(page); + unlock_page(page); + goto out; + } bio = get_swap_bio(GFP_KERNEL, page_private(page), page, end_swap_bio_read); if (bio == NULL) {