diff -Naur linux-2.6.28/mm/preswap.c linux-2.6.28-tmem/mm/preswap.c --- linux-2.6.28/mm/preswap.c 1969-12-31 17:00:00.000000000 -0700 +++ linux-2.6.28-tmem/mm/preswap.c 2009-01-05 18:07:07.000000000 -0700 @@ -0,0 +1,254 @@ +/* + * linux/mm/preswap.c + * + * Copyright (C) 2008 Dan Magenheimer, Oracle Corp. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "tmem.h" + +extern spinlock_t swap_lock; +extern struct swap_list_t swap_list; + +static uint32_t preswap_poolid = -1; + +int preswap_test(struct swap_info_struct *sis, unsigned long offset) +{ + if (!sis->preswap_map) return 0; + return test_bit(offset % BITS_PER_LONG, + &sis->preswap_map[offset/BITS_PER_LONG]); +} + +static inline void preswap_set(struct swap_info_struct *sis, unsigned long offset) +{ + if (!sis->preswap_map) return; + __set_bit(offset % BITS_PER_LONG, + &sis->preswap_map[offset/BITS_PER_LONG]); +} + +static inline void preswap_clear(struct swap_info_struct *sis, unsigned long offset) +{ + if (!sis->preswap_map) return; + __clear_bit(offset % BITS_PER_LONG, + &sis->preswap_map[offset/BITS_PER_LONG]); +} + +/* returns 1 if the page was successfully put into preswap, 0 if the page + * was declined, and -ERRNO for a specific error */ +int preswap_put(struct page *page) +{ + swp_entry_t entry = { .val = page_private(page), }; + unsigned type = swp_type(entry); + pgoff_t offset = swp_offset(entry); + uint64_t ind64 = (uint64_t)offset; + uint32_t ind = (uint32_t)offset; + unsigned long mfn = pfn_to_mfn(page_to_pfn(page)); + struct swap_info_struct *sis = get_swap_info_struct(type); + int dup = 0, ret; + + if ((int32_t)preswap_poolid < 0) return 0; + if (ind64 != ind) return 0; + if (preswap_test(sis,offset)) + dup = 1; + mb(); + ret = tmem_op(TMEM_PUT_PAGE, preswap_poolid, type, ind, mfn, 0, 0, 0); + if (ret == 1) { + preswap_set(sis, offset); + if (!dup) + sis->preswap_pages++; + } + else if (dup) + printk("preswap_put: failed dup put on %d,%lx\n", + type,(long)ind); + return ret; +} + +/* returns 1 if the page was successfully gotten from preswap, 0 if the page + * was declined, and -ERRNO for a specific error */ +int preswap_get(struct page *page) +{ + swp_entry_t entry = { .val = page_private(page), }; + unsigned type = swp_type(entry); + pgoff_t offset = swp_offset(entry); + uint64_t ind64 = (uint64_t)offset; + uint32_t ind = (uint32_t)offset; + unsigned long mfn = pfn_to_mfn(page_to_pfn(page)); + struct swap_info_struct *sis = get_swap_info_struct(type); + int ret; + + if ((int32_t)preswap_poolid < 0) return 0; + if (ind64 != ind) return 0; + if (!preswap_test(sis,offset)) return 0; + ret = tmem_op(TMEM_GET_PAGE, preswap_poolid, type, ind, mfn, 0, 0, 0); + return ret; +} + +void preswap_flush(unsigned type, unsigned long offset) +{ + uint64_t ind64 = (uint64_t)offset; + uint32_t ind = (uint32_t)offset; + struct swap_info_struct *sis = get_swap_info_struct(type); + + if ((int32_t)preswap_poolid < 0) return; + if (ind64 != ind) return; + if (preswap_test(sis,offset)) { + (void)tmem_op(TMEM_FLUSH_PAGE, preswap_poolid, type, + ind, 0, 0, 0, 0); + sis->preswap_pages--; + preswap_clear(sis, offset); + } +} + +void preswap_flush_area(unsigned type) +{ + struct swap_info_struct *sis = get_swap_info_struct(type); + + if ((int32_t)preswap_poolid < 0) return; + (void)tmem_op(TMEM_FLUSH_OBJECT, preswap_poolid, type, 0, 0, 0, 0, 0); + sis->preswap_pages = 0; +} + +/* code structure leveraged from sys_swapoff */ +void preswap_shrink(unsigned long target_pages) +{ + struct swap_info_struct * si = NULL; + unsigned long total_pages = 0, total_pages_to_unuse; + unsigned long pages = 0, unuse_pages = 0; + int type; + int wrapped = 0; + +//printk("djm shrink: skipped\n"); +//return; +//printk("djm shrink: entered\n"); + do { + /* + * we don't want to hold swap_lock while doing a very + * lengthy try_to_unuse, but swap_list may change + * so restart scan from swap_list.head each time + */ + spin_lock(&swap_lock); + total_pages = 0; + for (type = swap_list.head; type >= 0; type = si->next) { + si = get_swap_info_struct(type); +//printk("djm shrink: type=%d preswap_pages=%d\n",type,si->preswap_pages); + total_pages += si->preswap_pages; + } + if (total_pages <= target_pages) { +//printk("djm shrink: short circuit\n"); + spin_unlock(&swap_lock); + return; + } + total_pages_to_unuse = total_pages - target_pages; + for (type = swap_list.head; type >= 0; type = si->next) { +//printk("djm shrink: in second for loop\n"); + si = get_swap_info_struct(type); + if (total_pages_to_unuse < si->preswap_pages) + pages = unuse_pages = total_pages_to_unuse; + else { + pages = si->preswap_pages; + unuse_pages = 0; /* unuse all */ + } + if (security_vm_enough_memory(pages)) + continue; + vm_unacct_memory(pages); + break; + } + spin_unlock(&swap_lock); + if (type < 0) + return; + current->flags |= PF_SWAPOFF; +//printk(KERN_ERR "trying to unuse preswap, type=%u,pages=%lu\n", +//type,pages); +//printk("djm shrink: try to unuse\n"); + (void)try_to_unuse(type,1,unuse_pages); + current->flags &= ~PF_SWAPOFF; + wrapped++; + } while (wrapped <= 3); +} + +#ifdef CONFIG_PROC_FS +static struct proc_dir_entry *preswap_proc; + +static int preswap_procread(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len; + int type; + unsigned long totalpages = 0; + struct swap_info_struct * si = NULL; + +//int i = 10; +//printk("djm procread: enter\n"); + spin_lock(&swap_lock); + for (type = swap_list.head; type >= 0; type = si->next) { + si = get_swap_info_struct(type); + totalpages += si->preswap_pages; +//printk("djm procread: i=%d type=%d si->next=%d\n",i,(int)type,(int)si->next); +//if (!--i) break; + } + spin_unlock(&swap_lock); + len = sprintf(page, "%lu", totalpages); + *eof = 1; + return len; +} + +static int preswap_procwrite(struct file *file, const char __user *buffer, + unsigned long count, void *data) +{ + char memstring[64], *endchar; + unsigned long target_pages; + +//printk("djm procwrite: skipped, just returning\n"); +//return 1; + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (count <= 1) + return -EBADMSG; + if (count > sizeof(memstring)) + return -EFBIG; + + if (copy_from_user(memstring, buffer, count)) + return -EFAULT; + memstring[sizeof(memstring)-1] = '\0'; + target_pages = memparse(memstring, &endchar); + preswap_shrink(target_pages); + return count; +} + +static void preswap_procinit(void) +{ + preswap_proc = create_proc_entry("preswap", S_IRUGO, NULL); + if (!preswap_proc) { + printk(KERN_WARNING "preswap: error creating proc entry\n"); + return; + } + preswap_proc->read_proc = &preswap_procread; + preswap_proc->write_proc = &preswap_procwrite; +} +#else +#define preswap_procinit(_x) do {} while (0) +#endif + +void preswap_init(unsigned type) +{ + /* only need one tmem pool for all swap types */ + if ((int32_t)preswap_poolid >= 0) + return; + preswap_poolid = tmem_new_pool(0,0,TMEM_POOL_PERSIST); + if (preswap_poolid < 0) + return; + preswap_procinit(); +}