linux 1.0 内核注解 linux/mm/swap.c
时间:2009-03-18 来源:taozhijiangscu
/********************************************
*Created By: 陶治江
*Date: 2009-3-16
********************************************/
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/head.h>
#include <linux/kernel.h>
#include <linux/kernel_stat.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/stat.h> #include <asm/system.h> /* for cli()/sti() */
#include <asm/bitops.h> #define MAX_SWAPFILES 8 #define SWP_USED 1
#define SWP_WRITEOK 3 #define SWP_TYPE(entry) (((entry) & 0xfe) >> 1)
#define SWP_OFFSET(entry) ((entry) >> PAGE_SHIFT)
#define SWP_ENTRY(type,offset) (((type) << 1) | ((offset) << PAGE_SHIFT)) static int nr_swapfiles = 0;
static struct wait_queue * lock_queue = NULL; static struct swap_info_struct {
unsigned long flags;
struct inode * swap_file;
unsigned int swap_device;
unsigned char * swap_map;
unsigned char * swap_lockmap;
int pages;
int lowest_bit;
int highest_bit;
unsigned long max;
} swap_info[MAX_SWAPFILES];
//MAX_SWAPFILES=8,对于系统,一个交换设备就需要一个swap_info_struct
//结构同其对应,所以系统最多只能有8个交换设备 extern unsigned long free_page_list;
extern int shm_swap (int); /*
* The following are used to make sure we don't thrash too much...
* NOTE!! NR_LAST_FREE_PAGES must be a power of 2...
*/
#define NR_LAST_FREE_PAGES 32
static unsigned long last_free_pages[NR_LAST_FREE_PAGES] = {0,}; //算是交换调用的底层函数了
void rw_swap_page(int rw, unsigned long entry, char * buf)
{
unsigned long type, offset;
struct swap_info_struct * p; type = SWP_TYPE(entry);
if (type >= nr_swapfiles) {
printk("Internal error: bad swap-device\n");
return;
}
p = &swap_info[type];
offset = SWP_OFFSET(entry);
if (offset >= p->max) {
printk("rw_swap_page: weirdness\n");
return;
}
if (!(p->flags & SWP_USED)) {
printk("Trying to swap to unused swap-device\n");
return;
}
while (set_bit(offset,p->swap_lockmap)) //加锁
sleep_on(&lock_queue); //等待,又是一个睡眠队列
//内核状态的统计,读入
if (rw == READ)
kstat.pswpin++;
else //写出
kstat.pswpout++;
if (p->swap_device) { //有交换设备,就是设备交换了
ll_rw_page(rw,p->swap_device,offset,buf); //调用底层的读写函数
} else if (p->swap_file) { //否则就是文件交换
unsigned int zones[8];
unsigned int block;
int i, j; block = offset << (12 - p->swap_file->i_sb->s_blocksize_bits); //单位转换? for (i=0, j=0; j< PAGE_SIZE ; i++, j +=p->swap_file->i_sb->s_blocksize)
if (!(zones[i] = bmap(p->swap_file,block++))) //转换为底层的文件信息了吧
{
printk("rw_swap_page: bad swap file\n");
return;
}
ll_rw_swap_file(rw,p->swap_file->i_dev, zones, i,buf);
} else
printk("re_swap_page: no swap file or device\n");
if (offset && !clear_bit(offset,p->swap_lockmap)) //解锁
printk("rw_swap_page: lock already cleared\n");
wake_up(&lock_queue);
} //进行内存的交换,就是从交换设备中获得空闲的页面,返回的
//是构造产生的entry
unsigned int get_swap_page(void)
{
struct swap_info_struct * p;
unsigned int offset, type; p = swap_info;
for (type = 0 ; type < nr_swapfiles ; type++,p++)
{
if ((p->flags & SWP_WRITEOK) != SWP_WRITEOK) //Only SWP_WRITEOK can be used!
continue;
for (offset = p->lowest_bit; offset <= p->highest_bit ; offset++)
{
if (p->swap_map[offset]) //复位表示是空闲的
continue;
p->swap_map[offset] = 1; //使用了
nr_swap_pages--;
if (offset == p->highest_bit)
p->highest_bit--; //呃,干嘛这样设置呢?
p->lowest_bit = offset;
return SWP_ENTRY(type,offset);
}
}
return 0;
} unsigned long swap_duplicate(unsigned long entry)
{
struct swap_info_struct * p;
unsigned long offset, type; if (!entry)
return 0;
offset = SWP_OFFSET(entry);
type = SWP_TYPE(entry);
if (type == SHM_SWP_TYPE)
return entry;
if (type >= nr_swapfiles) {
printk("Trying to duplicate nonexistent swap-page\n");
return 0;
}
p = type + swap_info;
if (offset >= p->max) {
printk("swap_free: weirdness\n");
return 0;
}
if (!p->swap_map[offset]) { //未被使用
printk("swap_duplicate: trying to duplicate unused page\n");
return 0;
}
p->swap_map[offset]++; //这就是说明了swap_map保存的是引用的次数
//所以是byte数组了啊,可以共享多次了
return entry;
} //释放交换设备上的一个页,反正呢entry是构造的
void swap_free(unsigned long entry)
{
struct swap_info_struct * p;
unsigned long offset, type; if (!entry)
return;
//#define SWP_TYPE(entry) (((entry) & 0xfe) >> 1)
type = SWP_TYPE(entry);
if (type == SHM_SWP_TYPE) //??
return;
if (type >= nr_swapfiles) {
printk("Trying to free nonexistent swap-page\n");
return;
}
p = & swap_info[type];
offset = SWP_OFFSET(entry);
if (offset >= p->max) {
printk("swap_free: weirdness\n");
return;
}
if (!(p->flags & SWP_USED)) {
printk("Trying to free swap from unused swap-device\n");
return;
}
while (set_bit(offset,p->swap_lockmap))
sleep_on(&lock_queue);
if (offset < p->lowest_bit)
p->lowest_bit = offset;
if (offset > p->highest_bit)
p->highest_bit = offset;
//原先应该是1表示被使用的,进行保守检测
if (!p->swap_map[offset])
printk("swap_free: swap-space map bad (entry %08lx)\n",entry);
else
if (!--p->swap_map[offset])
nr_swap_pages++;
if (!clear_bit(offset,p->swap_lockmap))
printk("swap_free: lock already cleared\n");
wake_up(&lock_queue);
} //将交换设备中的内存页面交换入物理内存中
//而传递的table_ptr应该是页表项吧
void swap_in(unsigned long *table_ptr)
{
unsigned long entry;
unsigned long page; entry = *table_ptr;
if (PAGE_PRESENT & entry) {
printk("trying to swap in present page\n");
return;
}
if (!entry) {
printk("No swap page in swap_in\n");
return;
}
if (SWP_TYPE(entry) == SHM_SWP_TYPE) {
shm_no_page ((unsigned long *) table_ptr);
return;
}
//这里直接从物理内存申请地址
if (!(page = get_free_page(GFP_KERNEL))) {
oom(current);
page = BAD_PAGE;
} else
read_swap_page(entry, (char *) page);
if (*table_ptr != entry) {
free_page(page);
return;
}
*table_ptr = page | (PAGE_DIRTY | PAGE_PRIVATE);
swap_free(entry); //将交换设备中的页面释放
} //将内存的数据交换出交换设备中
static inline int try_to_swap_out(unsigned long * table_ptr)
{
int i;
unsigned long page;
unsigned long entry; page = *table_ptr;
if (!(PAGE_PRESENT & page))
return 0;
if (page >= high_memory)
return 0;
if (mem_map[MAP_NR(page)] & MAP_PAGE_RESERVED) //保留的内存不能交换哈
return 0;
if (PAGE_ACCESSED & page) {
*table_ptr &= ~PAGE_ACCESSED; //表示该页最近没有被访问(存取)过
return 0;
}
for (i = 0; i < NR_LAST_FREE_PAGES; i++)
if (last_free_pages[i] == (page & PAGE_MASK))
return 0;
if (PAGE_DIRTY & page) //对于脏的页,就需要调用底层的写入函数
//写入到交换设备中了
{
page &= PAGE_MASK;
if (mem_map[MAP_NR(page)] != 1) //说明是被共享的,就不交换出去了
return 0;
//在交换设备中取页面,然后把物理内存交换出去
if (!(entry = get_swap_page()))
return 0;
*table_ptr = entry; //感觉这样是正常的啊
invalidate();
write_swap_page(entry, (char *) page);
free_page(page); //可以释放物理内存了
return 1;
}
//对于这里可能是没有写过的页,但是感觉这样是不是把它给丢了啊??
//关键是这里将*table_ptr = 0没有保留信息啊
page &= PAGE_MASK;
*table_ptr = 0;
invalidate();
free_page(page);
return 1 + mem_map[MAP_NR(page)];
} /*
* sys_idle() does nothing much: it just searches for likely candidates for
* swapping out or forgetting about. This speeds up the search when we
* actually have to swap.
*/
asmlinkage int sys_idle(void)
{
need_resched = 1;
return 0;
} /*
* A new implementation of swap_out(). We do not swap complete processes,
* but only a small number of blocks, before we continue with the next
* process. The number of blocks actually swapped is determined on the
* number of page faults, that this process actually had in the last time,
* so we won't swap heavily used processes all the time ...
*
* Note: the priority argument is a hint on much CPU to waste with the
* swap block search, not a hint, of how much blocks to swap with
* each process.
*
* (C) 1993 Kai Petzke, [email protected]
*/
#ifdef NEW_SWAP
/*
* These are the miminum and maximum number of pages to swap from one process,
* before proceeding to the next:
*/
#define SWAP_MIN 4
#define SWAP_MAX 32 /*
* The actual number of pages to swap is determined as:
* SWAP_RATIO / (number of recent major page faults)
*/
#define SWAP_RATIO 128 //这里的priority可能是用于产生下面交换出去更有效的
//算法的,从函数被调用的角度来看priority是不断变大的
//而priority的变化将直接影响到counter的计数
static int swap_out(unsigned int priority)
{
static int swap_task; //静态变量!!!
int table;
int page;
long pg_table;
int loop;
int counter = NR_TASKS * 2 >> priority;
struct task_struct *p; counter = NR_TASKS * 2 >> priority;
for(; counter >= 0; counter--, swap_task++) {
/*
* Check that swap_task is suitable for swapping. If not, look for
* the next suitable process.
*/
loop = 0;
while(1) {
if(swap_task >= NR_TASKS) {
swap_task = 1;
if(loop) //表示这个大循环已经走过一遍了!!
/* all processes are unswappable or already swapped out */
return 0;
loop = 1;
} p = task[swap_task];
if(p && p->swappable && p->rss) //rss内存页面,不是被保留的,说明可能被交换出去的
break; swap_task++;
} /*
* Determine the number of pages to swap from this process.
*/
if(! p -> swap_cnt) { //number of pages to swap on next pass
p->dec_flt = (p->dec_flt * 3) / 4 + p->maj_flt - p->old_maj_flt;
p->old_maj_flt = p->maj_flt; if(p->dec_flt >= SWAP_RATIO / SWAP_MIN) {
p->dec_flt = SWAP_RATIO / SWAP_MIN;
p->swap_cnt = SWAP_MIN;
} else if(p->dec_flt <= SWAP_RATIO / SWAP_MAX)
p->swap_cnt = SWAP_MAX;
else
p->swap_cnt = SWAP_RATIO / p->dec_flt;
} /*
* Go through process' page directory.
*/
for(table = p->swap_table; table < 1024; table++)
{
pg_table = ((unsigned long *) p->tss.cr3)[table]; //获得页目录项
if(pg_table >= high_memory)
continue;
if(mem_map[MAP_NR(pg_table)] & MAP_PAGE_RESERVED)
continue;
if(!(PAGE_PRESENT & pg_table)) { //无效的页目录项,why it is bad?can not be 0?
printk("swap_out: bad page-table at pg_dir[%d]: %08lx\n",
table, pg_table);
((unsigned long *) p->tss.cr3)[table] = 0;
continue;
}
pg_table &= 0xfffff000; /*
* Go through this page table.
*/
for(page = p->swap_page; page < 1024; page++)
{
switch(try_to_swap_out(page + (unsigned long *) pg_table)) //晕,原来页目录项这里转换!
{
case 0:
break;
case 1: //this indicate sccuess@!
p->rss--; //的确这里递减了
/* continue with the following page the next time */
p->swap_table = table;
p->swap_page = page + 1;
if((--p->swap_cnt) == 0)
swap_task++;
return 1;
default: //表示直接释放了内存,没有实际写入到交换设备中的
p->rss--;
break;
}
} p->swap_page = 0;
} /*
* Finish work with this process, if we reached the end of the page
* directory. Mark restart from the beginning the next time.
*/
p->swap_table = 0; //这么说p->swap_table不是一个很神秘的变量啊?
}
return 0;
} #else /* old swapping procedure */ /*
* Go through the page tables, searching for a user page that
* we can swap out.
*
* We now check that the process is swappable (normally only 'init'
* is un-swappable), allowing high-priority processes which cannot be
* swapped out (things like user-level device drivers (Not implemented)).
*/
static int swap_out(unsigned int priority)
{
static int swap_task = 1;
static int swap_table = 0;
static int swap_page = 0;
int counter = NR_TASKS*8;
int pg_table;
struct task_struct * p; counter >>= priority;
check_task:
if (counter-- < 0)
return 0;
if (swap_task >= NR_TASKS) {
swap_task = 1;
goto check_task;
}
p = task[swap_task];
if (!p || !p->swappable) {
swap_task++;
goto check_task;
}
check_dir:
if (swap_table >= PTRS_PER_PAGE) {
swap_table = 0;
swap_task++;
goto check_task;
}
pg_table = ((unsigned long *) p->tss.cr3)[swap_table];
if (pg_table >= high_memory || (mem_map[MAP_NR(pg_table)] & MAP_PAGE_RESERVED)) {
swap_table++;
goto check_dir;
}
if (!(PAGE_PRESENT & pg_table)) {
printk("bad page-table at pg_dir[%d]: %08x\n",
swap_table,pg_table);
((unsigned long *) p->tss.cr3)[swap_table] = 0;
swap_table++;
goto check_dir;
}
pg_table &= PAGE_MASK;
check_table:
if (swap_page >= PTRS_PER_PAGE) {
swap_page = 0;
swap_table++;
goto check_dir;
}
switch (try_to_swap_out(swap_page + (unsigned long *) pg_table)) {
case 0: break;
case 1: p->rss--; return 1;
default: p->rss--;
}
swap_page++;
goto check_table;
} #endif static int try_to_free_page(void)
{
int i=6; while (i--) {
if (shrink_buffers(i))
return 1;
if (shm_swap(i))
return 1;
if (swap_out(i))
return 1;
}
return 0;
} /*
* Note that this must be atomic, or bad things will happen when
* pages are requested in interrupts (as malloc can do). Thus the
* cli/sti's.
*/
static inline void add_mem_queue(unsigned long addr, unsigned long * queue)
{
addr &= PAGE_MASK;
*(unsigned long *) addr = *queue;
*queue = addr; //队列头部
} /*
* Free_page() adds the page to the free lists. This is optimized for
* fast normal cases (no error jumps taken normally).
*
* The way to optimize jumps for gcc-2.2.2 is to:
* - select the "normal" case and put it inside the if () { XXX }
* - no else-statements if you can avoid them
*
* With the above two rules, you get a straight-line execution path
* for the normal case, giving better asm-code.
*/
void free_page(unsigned long addr)
{
if (addr < high_memory)
{
unsigned short * map = mem_map + MAP_NR(addr); if (*map) //表示使用中
{
if (!(*map & MAP_PAGE_RESERVED))
{
unsigned long flag; save_flags(flag);
cli();
if (!--*map) //只一个使用而没有被共享,好办一点
{
//?? :-(
if (nr_secondary_pages < MAX_SECONDARY_PAGES)
{
add_mem_queue(addr,&secondary_page_list);
nr_secondary_pages++;
restore_flags(flag);
return;
}
//表示空闲的内存页面吧
add_mem_queue(addr,&free_page_list);
nr_free_pages++;
}
restore_flags(flag);
}
return;
}
printk("Trying to free free memory (%08lx): memory probabably corrupted\n",addr);
printk("PC = %08lx\n",*(((unsigned long *)&addr)-1));
return;
}
} /*
* This is one ugly macro, but it simplifies checking, and makes
* this speed-critical place reasonably fast, especially as we have
* to do things with the interrupt flag etc.
*
* Note that this #define is heavily optimized to give fast code
* for the normal case - the if-statements are ordered so that gcc-2.2.2
* will make *no* jumps for the normal code. Don't touch unless you
* know what you are doing.
*/
#define REMOVE_FROM_MEM_QUEUE(queue,nr) \ //传递进来的nr是用来更新的吧,这里是宏而不是函数哦
cli(); \
if ((result = queue) != 0) { \
if (!(result & ~PAGE_MASK) && result < high_memory) { \
queue = *(unsigned long *) result; \ //获取队列首项
if (!mem_map[MAP_NR(result)]) { \ //free
mem_map[MAP_NR(result)] = 1; \ //被使用了
nr--; \
last_free_pages[index = (index + 1) & (NR_LAST_FREE_PAGES - 1)] = result; \
restore_flags(flag); \
return result; \
} \
printk("Free page %08lx has mem_map = %d\n", \
result,mem_map[MAP_NR(result)]); \
} else \ //空闲队列
printk("Result = 0x%08lx - memory map destroyed\n", result); \
queue = 0; \
nr = 0; \
} else if (nr) { \ //queue==0
printk(#nr " is %d, but " #queue " is empty\n",nr); \
nr = 0; \
} \
restore_flags(flag) /*
* Get physical address of first (actually last :-) free page, and mark it
* used. If no free pages left, return 0.
*
* Note that this is one of the most heavily called functions in the kernel,
* so it's a bit timing-critical (especially as we have to disable interrupts
* in it). See the above macro which does most of the work, and which is
* optimized for a fast normal path of execution.
*/
//获得物理地址中的空闲页面并标记被使用
unsigned long __get_free_page(int priority)
{
extern unsigned long intr_count;
unsigned long result, flag;
static unsigned long index = 0; /* this routine can be called at interrupt time via
malloc. We want to make sure that the critical
sections of code have interrupts disabled. -RAB
Is this code reentrant? */ if (intr_count && priority != GFP_ATOMIC) { //原子操作型啊
printk("gfp called nonatomically from interrupt %08lx\n",
((unsigned long *)&priority)[-1]);
priority = GFP_ATOMIC;
}
save_flags(flag);
repeat:
REMOVE_FROM_MEM_QUEUE(free_page_list,nr_free_pages);
if (priority == GFP_BUFFER)
return 0;
if (priority != GFP_ATOMIC)
if (try_to_free_page())
goto repeat; REMOVE_FROM_MEM_QUEUE(secondary_page_list,nr_secondary_pages);
return 0;
} /*
* Trying to stop swapping from a file is fraught with races, so
* we repeat quite a bit here when we have to pause. swapoff()
* isn't exactly timing-critical, so who cares?
*/
//这里这么多的goto,怎么说呢,如果没有外界干扰的话,或许这些全部的循环就可以了
//但是这是操作系统啊,所以这样比较保险吧,我是这样理解的~~~
static int try_to_unuse(unsigned int type)
{
int nr, pgt, pg;
unsigned long page, *ppage;
unsigned long tmp = 0;
struct task_struct *p; nr = 0;
/*
* When we have to sleep, we restart the whole algorithm from the same
* task we stopped in. That at least rids us of all races.
*/
repeat:
for (; nr < NR_TASKS ; nr++)
{
p = task[nr];
if (!p)
continue;
for (pgt = 0 ; pgt < PTRS_PER_PAGE ; pgt++)
{
ppage = pgt + ((unsigned long *) p->tss.cr3);
page = *ppage;
if (!page)
continue;
if (!(page & PAGE_PRESENT) || (page >= high_memory))
continue;
if (mem_map[MAP_NR(page)] & MAP_PAGE_RESERVED)
continue;
ppage = (unsigned long *) (page & PAGE_MASK);
for (pg = 0 ; pg < PTRS_PER_PAGE ; pg++,ppage++)
{ //页表了
page = *ppage;
if (!page)
continue;
if (page & PAGE_PRESENT)
continue;
if (SWP_TYPE(page) != type)
continue;
if (!tmp) { //goto 之后就不执行这个了,继续read_swap_page
if (!(tmp = __get_free_page(GFP_KERNEL)))
return -ENOMEM;
goto repeat;
}
read_swap_page(page, (char *) tmp);
if (*ppage == page) {
*ppage = tmp | (PAGE_DIRTY | PAGE_PRIVATE);
++p->rss;
swap_free(page);
tmp = 0;
}
goto repeat;
}
}
}
free_page(tmp);
return 0;
} //删除交换分区(交换文件卸载吧)
asmlinkage int sys_swapoff(const char * specialfile)
{
struct swap_info_struct * p;
struct inode * inode;
unsigned int type;
int i; if (!suser())
return -EPERM;
i = namei(specialfile,&inode);
if (i)
return i;
p = swap_info;
for (type = 0 ; type < nr_swapfiles ; type++,p++)
{
if ((p->flags & SWP_WRITEOK) != SWP_WRITEOK)
continue;
if (p->swap_file)
{
if (p->swap_file == inode)
break;
} else {
if (!S_ISBLK(inode->i_mode))
continue;
if (p->swap_device == inode->i_rdev)
break;
}
}
iput(inode);
if (type >= nr_swapfiles)
return -EINVAL;
p->flags = SWP_USED;
i = try_to_unuse(type); //在所有的进程中查找用这交换文件并试图全部读取到内存中
if (i) { //fall
p->flags = SWP_WRITEOK;
return i;
}
nr_swap_pages -= p->pages;
iput(p->swap_file);
//复原
p->swap_file = NULL;
p->swap_device = 0;
vfree(p->swap_map);
p->swap_map = NULL;
free_page((long) p->swap_lockmap);
p->swap_lockmap = NULL;
p->flags = 0;
return 0;
} /*
* Written 01/25/92 by Simmule Turner, heavily changed by Linus.
*
* The swapon system call
*/
asmlinkage int sys_swapon(const char * specialfile)
{
struct swap_info_struct * p;
struct inode * swap_inode;
unsigned int type;
int i,j;
int error; if (!suser())
return -EPERM;
//从swap_info寻找出空闲的swap_info_struct结构
p = swap_info;
for (type = 0 ; type < nr_swapfiles ; type++,p++)
if (!(p->flags & SWP_USED))
break;
if (type >= MAX_SWAPFILES)
return -EPERM;
//全局变量nr_swapfiles记录了系统曾经使用数组swap_info的最多项
//该值从不降低!!
if (type >= nr_swapfiles)
nr_swapfiles = type+1;
p->flags = SWP_USED;
p->swap_file = NULL;
p->swap_device = 0; //交换设备号,对交换文件,值为0
p->swap_map = NULL;
p->swap_lockmap = NULL; //一个位图,用于进行锁定的标识
p->lowest_bit = 0; //交换设备中第一个和最后一个可用页面的位置
p->highest_bit = 0;
p->max = 1; //交换设备最大页面数
error = namei(specialfile,&swap_inode);
if (error)
goto bad_swap;
error = -EBUSY;
if (swap_inode->i_count != 1)
goto bad_swap;
error = -EINVAL;
if (S_ISBLK(swap_inode->i_mode)) //块设备文件
{
p->swap_device = swap_inode->i_rdev;
iput(swap_inode);
error = -ENODEV;
if (!p->swap_device)
goto bad_swap;
error = -EBUSY;
for (i = 0 ; i < nr_swapfiles ; i++)
{
if (i == type)
continue;
if (p->swap_device == swap_info[i].swap_device)
goto bad_swap;
}
} else if (S_ISREG(swap_inode->i_mode)) //普通文件
p->swap_file = swap_inode;
else
goto bad_swap;
//呃,为swap_lockmap分配了一页的内存
p->swap_lockmap = (unsigned char *) get_free_page(GFP_USER);
if (!p->swap_lockmap) {
printk("Unable to start swapping: out of memory :-)\n");
error = -ENOMEM;
goto bad_swap;
}
//#define SWP_ENTRY(type,offset) (((type) << 1) | ((offset) << PAGE_SHIFT))
read_swap_page(SWP_ENTRY(type,0), (char *) p->swap_lockmap);
if (memcmp("SWAP-SPACE",p->swap_lockmap+4086,10)) {
//最后的10个字节是魔数,其余全部是位图,这里说明是第一个版本的
//交换设备。这里比较失败 ~~~
printk("Unable to find swap-space signature\n");
error = -EINVAL;
goto bad_swap;
}
memset(p->swap_lockmap+PAGE_SIZE-10,0,10); //最后的10个字节清零
j = 0;
p->lowest_bit = 0;
p->highest_bit = 0;
//进行整个位图的搜索,然后进行lowest_bit等变量的设置
for (i = 1 ; i < 8*PAGE_SIZE ; i++)
{
if (test_bit(i,p->swap_lockmap)) //呃,这个函数是bt sbb操作来将CF结果反映出来的
//置位,才有用的
{
if (!p->lowest_bit)
p->lowest_bit = i; //交换设备的第一个页有特殊的用处,所以lowest_bit肯定不会是0
p->highest_bit = i;
p->max = i+1; //交换设备最大页面数
j++;
}
}
if (!j) {
printk("Empty swap-file\n");
error = -EINVAL;
goto bad_swap;
}
//根据p->max来申请一块内存,用于建立数组swap_map
p->swap_map = (unsigned char *) vmalloc(p->max);
if (!p->swap_map) {
error = -ENOMEM;
goto bad_swap;
}
for (i = 1 ; i < p->max ; i++) {
if (test_bit(i,p->swap_lockmap)) //置位
p->swap_map[i] = 0;
else
p->swap_map[i] = 0x80; //坏页,不能被使用
}
p->swap_map[0] = 0x80; //设置为坏页,因为它不能被挪作他用了
memset(p->swap_lockmap,0,PAGE_SIZE); //全部复位
p->flags = SWP_WRITEOK;
p->pages = j;
nr_swap_pages += j; //更新系统全局信息
printk("Adding Swap: %dk swap-space\n",j<<2); //<<2 ???
return 0;
bad_swap:
free_page((long) p->swap_lockmap);
vfree(p->swap_map);
iput(p->swap_file);
p->swap_device = 0;
p->swap_file = NULL;
p->swap_map = NULL;
p->swap_lockmap = NULL;
p->flags = 0;
return error;
} void si_swapinfo(struct sysinfo *val)
{
unsigned int i, j; val->freeswap = val->totalswap = 0;
for (i = 0; i < nr_swapfiles; i++) //原来nr_swapfiles是这里用的
{
if (!(swap_info[i].flags & SWP_USED))
continue;
for (j = 0; j < swap_info[i].max; ++j) //对每个页面进行统计了
switch (swap_info[i].swap_map[j])
{
case 128: //BIT7,坏的
continue;
case 0: //0 表示空闲的
++val->freeswap;
default:
++val->totalswap;
}
}
val->freeswap <<= PAGE_SHIFT;
val->totalswap <<= PAGE_SHIFT;
return;
}
文档地址:http://blogimg.chinaunix.net/blog/upfile2/090318150923.pdf
*Created By: 陶治江
*Date: 2009-3-16
********************************************/
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/head.h>
#include <linux/kernel.h>
#include <linux/kernel_stat.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/stat.h> #include <asm/system.h> /* for cli()/sti() */
#include <asm/bitops.h> #define MAX_SWAPFILES 8 #define SWP_USED 1
#define SWP_WRITEOK 3 #define SWP_TYPE(entry) (((entry) & 0xfe) >> 1)
#define SWP_OFFSET(entry) ((entry) >> PAGE_SHIFT)
#define SWP_ENTRY(type,offset) (((type) << 1) | ((offset) << PAGE_SHIFT)) static int nr_swapfiles = 0;
static struct wait_queue * lock_queue = NULL; static struct swap_info_struct {
unsigned long flags;
struct inode * swap_file;
unsigned int swap_device;
unsigned char * swap_map;
unsigned char * swap_lockmap;
int pages;
int lowest_bit;
int highest_bit;
unsigned long max;
} swap_info[MAX_SWAPFILES];
//MAX_SWAPFILES=8,对于系统,一个交换设备就需要一个swap_info_struct
//结构同其对应,所以系统最多只能有8个交换设备 extern unsigned long free_page_list;
extern int shm_swap (int); /*
* The following are used to make sure we don't thrash too much...
* NOTE!! NR_LAST_FREE_PAGES must be a power of 2...
*/
#define NR_LAST_FREE_PAGES 32
static unsigned long last_free_pages[NR_LAST_FREE_PAGES] = {0,}; //算是交换调用的底层函数了
void rw_swap_page(int rw, unsigned long entry, char * buf)
{
unsigned long type, offset;
struct swap_info_struct * p; type = SWP_TYPE(entry);
if (type >= nr_swapfiles) {
printk("Internal error: bad swap-device\n");
return;
}
p = &swap_info[type];
offset = SWP_OFFSET(entry);
if (offset >= p->max) {
printk("rw_swap_page: weirdness\n");
return;
}
if (!(p->flags & SWP_USED)) {
printk("Trying to swap to unused swap-device\n");
return;
}
while (set_bit(offset,p->swap_lockmap)) //加锁
sleep_on(&lock_queue); //等待,又是一个睡眠队列
//内核状态的统计,读入
if (rw == READ)
kstat.pswpin++;
else //写出
kstat.pswpout++;
if (p->swap_device) { //有交换设备,就是设备交换了
ll_rw_page(rw,p->swap_device,offset,buf); //调用底层的读写函数
} else if (p->swap_file) { //否则就是文件交换
unsigned int zones[8];
unsigned int block;
int i, j; block = offset << (12 - p->swap_file->i_sb->s_blocksize_bits); //单位转换? for (i=0, j=0; j< PAGE_SIZE ; i++, j +=p->swap_file->i_sb->s_blocksize)
if (!(zones[i] = bmap(p->swap_file,block++))) //转换为底层的文件信息了吧
{
printk("rw_swap_page: bad swap file\n");
return;
}
ll_rw_swap_file(rw,p->swap_file->i_dev, zones, i,buf);
} else
printk("re_swap_page: no swap file or device\n");
if (offset && !clear_bit(offset,p->swap_lockmap)) //解锁
printk("rw_swap_page: lock already cleared\n");
wake_up(&lock_queue);
} //进行内存的交换,就是从交换设备中获得空闲的页面,返回的
//是构造产生的entry
unsigned int get_swap_page(void)
{
struct swap_info_struct * p;
unsigned int offset, type; p = swap_info;
for (type = 0 ; type < nr_swapfiles ; type++,p++)
{
if ((p->flags & SWP_WRITEOK) != SWP_WRITEOK) //Only SWP_WRITEOK can be used!
continue;
for (offset = p->lowest_bit; offset <= p->highest_bit ; offset++)
{
if (p->swap_map[offset]) //复位表示是空闲的
continue;
p->swap_map[offset] = 1; //使用了
nr_swap_pages--;
if (offset == p->highest_bit)
p->highest_bit--; //呃,干嘛这样设置呢?
p->lowest_bit = offset;
return SWP_ENTRY(type,offset);
}
}
return 0;
} unsigned long swap_duplicate(unsigned long entry)
{
struct swap_info_struct * p;
unsigned long offset, type; if (!entry)
return 0;
offset = SWP_OFFSET(entry);
type = SWP_TYPE(entry);
if (type == SHM_SWP_TYPE)
return entry;
if (type >= nr_swapfiles) {
printk("Trying to duplicate nonexistent swap-page\n");
return 0;
}
p = type + swap_info;
if (offset >= p->max) {
printk("swap_free: weirdness\n");
return 0;
}
if (!p->swap_map[offset]) { //未被使用
printk("swap_duplicate: trying to duplicate unused page\n");
return 0;
}
p->swap_map[offset]++; //这就是说明了swap_map保存的是引用的次数
//所以是byte数组了啊,可以共享多次了
return entry;
} //释放交换设备上的一个页,反正呢entry是构造的
void swap_free(unsigned long entry)
{
struct swap_info_struct * p;
unsigned long offset, type; if (!entry)
return;
//#define SWP_TYPE(entry) (((entry) & 0xfe) >> 1)
type = SWP_TYPE(entry);
if (type == SHM_SWP_TYPE) //??
return;
if (type >= nr_swapfiles) {
printk("Trying to free nonexistent swap-page\n");
return;
}
p = & swap_info[type];
offset = SWP_OFFSET(entry);
if (offset >= p->max) {
printk("swap_free: weirdness\n");
return;
}
if (!(p->flags & SWP_USED)) {
printk("Trying to free swap from unused swap-device\n");
return;
}
while (set_bit(offset,p->swap_lockmap))
sleep_on(&lock_queue);
if (offset < p->lowest_bit)
p->lowest_bit = offset;
if (offset > p->highest_bit)
p->highest_bit = offset;
//原先应该是1表示被使用的,进行保守检测
if (!p->swap_map[offset])
printk("swap_free: swap-space map bad (entry %08lx)\n",entry);
else
if (!--p->swap_map[offset])
nr_swap_pages++;
if (!clear_bit(offset,p->swap_lockmap))
printk("swap_free: lock already cleared\n");
wake_up(&lock_queue);
} //将交换设备中的内存页面交换入物理内存中
//而传递的table_ptr应该是页表项吧
void swap_in(unsigned long *table_ptr)
{
unsigned long entry;
unsigned long page; entry = *table_ptr;
if (PAGE_PRESENT & entry) {
printk("trying to swap in present page\n");
return;
}
if (!entry) {
printk("No swap page in swap_in\n");
return;
}
if (SWP_TYPE(entry) == SHM_SWP_TYPE) {
shm_no_page ((unsigned long *) table_ptr);
return;
}
//这里直接从物理内存申请地址
if (!(page = get_free_page(GFP_KERNEL))) {
oom(current);
page = BAD_PAGE;
} else
read_swap_page(entry, (char *) page);
if (*table_ptr != entry) {
free_page(page);
return;
}
*table_ptr = page | (PAGE_DIRTY | PAGE_PRIVATE);
swap_free(entry); //将交换设备中的页面释放
} //将内存的数据交换出交换设备中
static inline int try_to_swap_out(unsigned long * table_ptr)
{
int i;
unsigned long page;
unsigned long entry; page = *table_ptr;
if (!(PAGE_PRESENT & page))
return 0;
if (page >= high_memory)
return 0;
if (mem_map[MAP_NR(page)] & MAP_PAGE_RESERVED) //保留的内存不能交换哈
return 0;
if (PAGE_ACCESSED & page) {
*table_ptr &= ~PAGE_ACCESSED; //表示该页最近没有被访问(存取)过
return 0;
}
for (i = 0; i < NR_LAST_FREE_PAGES; i++)
if (last_free_pages[i] == (page & PAGE_MASK))
return 0;
if (PAGE_DIRTY & page) //对于脏的页,就需要调用底层的写入函数
//写入到交换设备中了
{
page &= PAGE_MASK;
if (mem_map[MAP_NR(page)] != 1) //说明是被共享的,就不交换出去了
return 0;
//在交换设备中取页面,然后把物理内存交换出去
if (!(entry = get_swap_page()))
return 0;
*table_ptr = entry; //感觉这样是正常的啊
invalidate();
write_swap_page(entry, (char *) page);
free_page(page); //可以释放物理内存了
return 1;
}
//对于这里可能是没有写过的页,但是感觉这样是不是把它给丢了啊??
//关键是这里将*table_ptr = 0没有保留信息啊
page &= PAGE_MASK;
*table_ptr = 0;
invalidate();
free_page(page);
return 1 + mem_map[MAP_NR(page)];
} /*
* sys_idle() does nothing much: it just searches for likely candidates for
* swapping out or forgetting about. This speeds up the search when we
* actually have to swap.
*/
asmlinkage int sys_idle(void)
{
need_resched = 1;
return 0;
} /*
* A new implementation of swap_out(). We do not swap complete processes,
* but only a small number of blocks, before we continue with the next
* process. The number of blocks actually swapped is determined on the
* number of page faults, that this process actually had in the last time,
* so we won't swap heavily used processes all the time ...
*
* Note: the priority argument is a hint on much CPU to waste with the
* swap block search, not a hint, of how much blocks to swap with
* each process.
*
* (C) 1993 Kai Petzke, [email protected]
*/
#ifdef NEW_SWAP
/*
* These are the miminum and maximum number of pages to swap from one process,
* before proceeding to the next:
*/
#define SWAP_MIN 4
#define SWAP_MAX 32 /*
* The actual number of pages to swap is determined as:
* SWAP_RATIO / (number of recent major page faults)
*/
#define SWAP_RATIO 128 //这里的priority可能是用于产生下面交换出去更有效的
//算法的,从函数被调用的角度来看priority是不断变大的
//而priority的变化将直接影响到counter的计数
static int swap_out(unsigned int priority)
{
static int swap_task; //静态变量!!!
int table;
int page;
long pg_table;
int loop;
int counter = NR_TASKS * 2 >> priority;
struct task_struct *p; counter = NR_TASKS * 2 >> priority;
for(; counter >= 0; counter--, swap_task++) {
/*
* Check that swap_task is suitable for swapping. If not, look for
* the next suitable process.
*/
loop = 0;
while(1) {
if(swap_task >= NR_TASKS) {
swap_task = 1;
if(loop) //表示这个大循环已经走过一遍了!!
/* all processes are unswappable or already swapped out */
return 0;
loop = 1;
} p = task[swap_task];
if(p && p->swappable && p->rss) //rss内存页面,不是被保留的,说明可能被交换出去的
break; swap_task++;
} /*
* Determine the number of pages to swap from this process.
*/
if(! p -> swap_cnt) { //number of pages to swap on next pass
p->dec_flt = (p->dec_flt * 3) / 4 + p->maj_flt - p->old_maj_flt;
p->old_maj_flt = p->maj_flt; if(p->dec_flt >= SWAP_RATIO / SWAP_MIN) {
p->dec_flt = SWAP_RATIO / SWAP_MIN;
p->swap_cnt = SWAP_MIN;
} else if(p->dec_flt <= SWAP_RATIO / SWAP_MAX)
p->swap_cnt = SWAP_MAX;
else
p->swap_cnt = SWAP_RATIO / p->dec_flt;
} /*
* Go through process' page directory.
*/
for(table = p->swap_table; table < 1024; table++)
{
pg_table = ((unsigned long *) p->tss.cr3)[table]; //获得页目录项
if(pg_table >= high_memory)
continue;
if(mem_map[MAP_NR(pg_table)] & MAP_PAGE_RESERVED)
continue;
if(!(PAGE_PRESENT & pg_table)) { //无效的页目录项,why it is bad?can not be 0?
printk("swap_out: bad page-table at pg_dir[%d]: %08lx\n",
table, pg_table);
((unsigned long *) p->tss.cr3)[table] = 0;
continue;
}
pg_table &= 0xfffff000; /*
* Go through this page table.
*/
for(page = p->swap_page; page < 1024; page++)
{
switch(try_to_swap_out(page + (unsigned long *) pg_table)) //晕,原来页目录项这里转换!
{
case 0:
break;
case 1: //this indicate sccuess@!
p->rss--; //的确这里递减了
/* continue with the following page the next time */
p->swap_table = table;
p->swap_page = page + 1;
if((--p->swap_cnt) == 0)
swap_task++;
return 1;
default: //表示直接释放了内存,没有实际写入到交换设备中的
p->rss--;
break;
}
} p->swap_page = 0;
} /*
* Finish work with this process, if we reached the end of the page
* directory. Mark restart from the beginning the next time.
*/
p->swap_table = 0; //这么说p->swap_table不是一个很神秘的变量啊?
}
return 0;
} #else /* old swapping procedure */ /*
* Go through the page tables, searching for a user page that
* we can swap out.
*
* We now check that the process is swappable (normally only 'init'
* is un-swappable), allowing high-priority processes which cannot be
* swapped out (things like user-level device drivers (Not implemented)).
*/
static int swap_out(unsigned int priority)
{
static int swap_task = 1;
static int swap_table = 0;
static int swap_page = 0;
int counter = NR_TASKS*8;
int pg_table;
struct task_struct * p; counter >>= priority;
check_task:
if (counter-- < 0)
return 0;
if (swap_task >= NR_TASKS) {
swap_task = 1;
goto check_task;
}
p = task[swap_task];
if (!p || !p->swappable) {
swap_task++;
goto check_task;
}
check_dir:
if (swap_table >= PTRS_PER_PAGE) {
swap_table = 0;
swap_task++;
goto check_task;
}
pg_table = ((unsigned long *) p->tss.cr3)[swap_table];
if (pg_table >= high_memory || (mem_map[MAP_NR(pg_table)] & MAP_PAGE_RESERVED)) {
swap_table++;
goto check_dir;
}
if (!(PAGE_PRESENT & pg_table)) {
printk("bad page-table at pg_dir[%d]: %08x\n",
swap_table,pg_table);
((unsigned long *) p->tss.cr3)[swap_table] = 0;
swap_table++;
goto check_dir;
}
pg_table &= PAGE_MASK;
check_table:
if (swap_page >= PTRS_PER_PAGE) {
swap_page = 0;
swap_table++;
goto check_dir;
}
switch (try_to_swap_out(swap_page + (unsigned long *) pg_table)) {
case 0: break;
case 1: p->rss--; return 1;
default: p->rss--;
}
swap_page++;
goto check_table;
} #endif static int try_to_free_page(void)
{
int i=6; while (i--) {
if (shrink_buffers(i))
return 1;
if (shm_swap(i))
return 1;
if (swap_out(i))
return 1;
}
return 0;
} /*
* Note that this must be atomic, or bad things will happen when
* pages are requested in interrupts (as malloc can do). Thus the
* cli/sti's.
*/
static inline void add_mem_queue(unsigned long addr, unsigned long * queue)
{
addr &= PAGE_MASK;
*(unsigned long *) addr = *queue;
*queue = addr; //队列头部
} /*
* Free_page() adds the page to the free lists. This is optimized for
* fast normal cases (no error jumps taken normally).
*
* The way to optimize jumps for gcc-2.2.2 is to:
* - select the "normal" case and put it inside the if () { XXX }
* - no else-statements if you can avoid them
*
* With the above two rules, you get a straight-line execution path
* for the normal case, giving better asm-code.
*/
void free_page(unsigned long addr)
{
if (addr < high_memory)
{
unsigned short * map = mem_map + MAP_NR(addr); if (*map) //表示使用中
{
if (!(*map & MAP_PAGE_RESERVED))
{
unsigned long flag; save_flags(flag);
cli();
if (!--*map) //只一个使用而没有被共享,好办一点
{
//?? :-(
if (nr_secondary_pages < MAX_SECONDARY_PAGES)
{
add_mem_queue(addr,&secondary_page_list);
nr_secondary_pages++;
restore_flags(flag);
return;
}
//表示空闲的内存页面吧
add_mem_queue(addr,&free_page_list);
nr_free_pages++;
}
restore_flags(flag);
}
return;
}
printk("Trying to free free memory (%08lx): memory probabably corrupted\n",addr);
printk("PC = %08lx\n",*(((unsigned long *)&addr)-1));
return;
}
} /*
* This is one ugly macro, but it simplifies checking, and makes
* this speed-critical place reasonably fast, especially as we have
* to do things with the interrupt flag etc.
*
* Note that this #define is heavily optimized to give fast code
* for the normal case - the if-statements are ordered so that gcc-2.2.2
* will make *no* jumps for the normal code. Don't touch unless you
* know what you are doing.
*/
#define REMOVE_FROM_MEM_QUEUE(queue,nr) \ //传递进来的nr是用来更新的吧,这里是宏而不是函数哦
cli(); \
if ((result = queue) != 0) { \
if (!(result & ~PAGE_MASK) && result < high_memory) { \
queue = *(unsigned long *) result; \ //获取队列首项
if (!mem_map[MAP_NR(result)]) { \ //free
mem_map[MAP_NR(result)] = 1; \ //被使用了
nr--; \
last_free_pages[index = (index + 1) & (NR_LAST_FREE_PAGES - 1)] = result; \
restore_flags(flag); \
return result; \
} \
printk("Free page %08lx has mem_map = %d\n", \
result,mem_map[MAP_NR(result)]); \
} else \ //空闲队列
printk("Result = 0x%08lx - memory map destroyed\n", result); \
queue = 0; \
nr = 0; \
} else if (nr) { \ //queue==0
printk(#nr " is %d, but " #queue " is empty\n",nr); \
nr = 0; \
} \
restore_flags(flag) /*
* Get physical address of first (actually last :-) free page, and mark it
* used. If no free pages left, return 0.
*
* Note that this is one of the most heavily called functions in the kernel,
* so it's a bit timing-critical (especially as we have to disable interrupts
* in it). See the above macro which does most of the work, and which is
* optimized for a fast normal path of execution.
*/
//获得物理地址中的空闲页面并标记被使用
unsigned long __get_free_page(int priority)
{
extern unsigned long intr_count;
unsigned long result, flag;
static unsigned long index = 0; /* this routine can be called at interrupt time via
malloc. We want to make sure that the critical
sections of code have interrupts disabled. -RAB
Is this code reentrant? */ if (intr_count && priority != GFP_ATOMIC) { //原子操作型啊
printk("gfp called nonatomically from interrupt %08lx\n",
((unsigned long *)&priority)[-1]);
priority = GFP_ATOMIC;
}
save_flags(flag);
repeat:
REMOVE_FROM_MEM_QUEUE(free_page_list,nr_free_pages);
if (priority == GFP_BUFFER)
return 0;
if (priority != GFP_ATOMIC)
if (try_to_free_page())
goto repeat; REMOVE_FROM_MEM_QUEUE(secondary_page_list,nr_secondary_pages);
return 0;
} /*
* Trying to stop swapping from a file is fraught with races, so
* we repeat quite a bit here when we have to pause. swapoff()
* isn't exactly timing-critical, so who cares?
*/
//这里这么多的goto,怎么说呢,如果没有外界干扰的话,或许这些全部的循环就可以了
//但是这是操作系统啊,所以这样比较保险吧,我是这样理解的~~~
static int try_to_unuse(unsigned int type)
{
int nr, pgt, pg;
unsigned long page, *ppage;
unsigned long tmp = 0;
struct task_struct *p; nr = 0;
/*
* When we have to sleep, we restart the whole algorithm from the same
* task we stopped in. That at least rids us of all races.
*/
repeat:
for (; nr < NR_TASKS ; nr++)
{
p = task[nr];
if (!p)
continue;
for (pgt = 0 ; pgt < PTRS_PER_PAGE ; pgt++)
{
ppage = pgt + ((unsigned long *) p->tss.cr3);
page = *ppage;
if (!page)
continue;
if (!(page & PAGE_PRESENT) || (page >= high_memory))
continue;
if (mem_map[MAP_NR(page)] & MAP_PAGE_RESERVED)
continue;
ppage = (unsigned long *) (page & PAGE_MASK);
for (pg = 0 ; pg < PTRS_PER_PAGE ; pg++,ppage++)
{ //页表了
page = *ppage;
if (!page)
continue;
if (page & PAGE_PRESENT)
continue;
if (SWP_TYPE(page) != type)
continue;
if (!tmp) { //goto 之后就不执行这个了,继续read_swap_page
if (!(tmp = __get_free_page(GFP_KERNEL)))
return -ENOMEM;
goto repeat;
}
read_swap_page(page, (char *) tmp);
if (*ppage == page) {
*ppage = tmp | (PAGE_DIRTY | PAGE_PRIVATE);
++p->rss;
swap_free(page);
tmp = 0;
}
goto repeat;
}
}
}
free_page(tmp);
return 0;
} //删除交换分区(交换文件卸载吧)
asmlinkage int sys_swapoff(const char * specialfile)
{
struct swap_info_struct * p;
struct inode * inode;
unsigned int type;
int i; if (!suser())
return -EPERM;
i = namei(specialfile,&inode);
if (i)
return i;
p = swap_info;
for (type = 0 ; type < nr_swapfiles ; type++,p++)
{
if ((p->flags & SWP_WRITEOK) != SWP_WRITEOK)
continue;
if (p->swap_file)
{
if (p->swap_file == inode)
break;
} else {
if (!S_ISBLK(inode->i_mode))
continue;
if (p->swap_device == inode->i_rdev)
break;
}
}
iput(inode);
if (type >= nr_swapfiles)
return -EINVAL;
p->flags = SWP_USED;
i = try_to_unuse(type); //在所有的进程中查找用这交换文件并试图全部读取到内存中
if (i) { //fall
p->flags = SWP_WRITEOK;
return i;
}
nr_swap_pages -= p->pages;
iput(p->swap_file);
//复原
p->swap_file = NULL;
p->swap_device = 0;
vfree(p->swap_map);
p->swap_map = NULL;
free_page((long) p->swap_lockmap);
p->swap_lockmap = NULL;
p->flags = 0;
return 0;
} /*
* Written 01/25/92 by Simmule Turner, heavily changed by Linus.
*
* The swapon system call
*/
asmlinkage int sys_swapon(const char * specialfile)
{
struct swap_info_struct * p;
struct inode * swap_inode;
unsigned int type;
int i,j;
int error; if (!suser())
return -EPERM;
//从swap_info寻找出空闲的swap_info_struct结构
p = swap_info;
for (type = 0 ; type < nr_swapfiles ; type++,p++)
if (!(p->flags & SWP_USED))
break;
if (type >= MAX_SWAPFILES)
return -EPERM;
//全局变量nr_swapfiles记录了系统曾经使用数组swap_info的最多项
//该值从不降低!!
if (type >= nr_swapfiles)
nr_swapfiles = type+1;
p->flags = SWP_USED;
p->swap_file = NULL;
p->swap_device = 0; //交换设备号,对交换文件,值为0
p->swap_map = NULL;
p->swap_lockmap = NULL; //一个位图,用于进行锁定的标识
p->lowest_bit = 0; //交换设备中第一个和最后一个可用页面的位置
p->highest_bit = 0;
p->max = 1; //交换设备最大页面数
error = namei(specialfile,&swap_inode);
if (error)
goto bad_swap;
error = -EBUSY;
if (swap_inode->i_count != 1)
goto bad_swap;
error = -EINVAL;
if (S_ISBLK(swap_inode->i_mode)) //块设备文件
{
p->swap_device = swap_inode->i_rdev;
iput(swap_inode);
error = -ENODEV;
if (!p->swap_device)
goto bad_swap;
error = -EBUSY;
for (i = 0 ; i < nr_swapfiles ; i++)
{
if (i == type)
continue;
if (p->swap_device == swap_info[i].swap_device)
goto bad_swap;
}
} else if (S_ISREG(swap_inode->i_mode)) //普通文件
p->swap_file = swap_inode;
else
goto bad_swap;
//呃,为swap_lockmap分配了一页的内存
p->swap_lockmap = (unsigned char *) get_free_page(GFP_USER);
if (!p->swap_lockmap) {
printk("Unable to start swapping: out of memory :-)\n");
error = -ENOMEM;
goto bad_swap;
}
//#define SWP_ENTRY(type,offset) (((type) << 1) | ((offset) << PAGE_SHIFT))
read_swap_page(SWP_ENTRY(type,0), (char *) p->swap_lockmap);
if (memcmp("SWAP-SPACE",p->swap_lockmap+4086,10)) {
//最后的10个字节是魔数,其余全部是位图,这里说明是第一个版本的
//交换设备。这里比较失败 ~~~
printk("Unable to find swap-space signature\n");
error = -EINVAL;
goto bad_swap;
}
memset(p->swap_lockmap+PAGE_SIZE-10,0,10); //最后的10个字节清零
j = 0;
p->lowest_bit = 0;
p->highest_bit = 0;
//进行整个位图的搜索,然后进行lowest_bit等变量的设置
for (i = 1 ; i < 8*PAGE_SIZE ; i++)
{
if (test_bit(i,p->swap_lockmap)) //呃,这个函数是bt sbb操作来将CF结果反映出来的
//置位,才有用的
{
if (!p->lowest_bit)
p->lowest_bit = i; //交换设备的第一个页有特殊的用处,所以lowest_bit肯定不会是0
p->highest_bit = i;
p->max = i+1; //交换设备最大页面数
j++;
}
}
if (!j) {
printk("Empty swap-file\n");
error = -EINVAL;
goto bad_swap;
}
//根据p->max来申请一块内存,用于建立数组swap_map
p->swap_map = (unsigned char *) vmalloc(p->max);
if (!p->swap_map) {
error = -ENOMEM;
goto bad_swap;
}
for (i = 1 ; i < p->max ; i++) {
if (test_bit(i,p->swap_lockmap)) //置位
p->swap_map[i] = 0;
else
p->swap_map[i] = 0x80; //坏页,不能被使用
}
p->swap_map[0] = 0x80; //设置为坏页,因为它不能被挪作他用了
memset(p->swap_lockmap,0,PAGE_SIZE); //全部复位
p->flags = SWP_WRITEOK;
p->pages = j;
nr_swap_pages += j; //更新系统全局信息
printk("Adding Swap: %dk swap-space\n",j<<2); //<<2 ???
return 0;
bad_swap:
free_page((long) p->swap_lockmap);
vfree(p->swap_map);
iput(p->swap_file);
p->swap_device = 0;
p->swap_file = NULL;
p->swap_map = NULL;
p->swap_lockmap = NULL;
p->flags = 0;
return error;
} void si_swapinfo(struct sysinfo *val)
{
unsigned int i, j; val->freeswap = val->totalswap = 0;
for (i = 0; i < nr_swapfiles; i++) //原来nr_swapfiles是这里用的
{
if (!(swap_info[i].flags & SWP_USED))
continue;
for (j = 0; j < swap_info[i].max; ++j) //对每个页面进行统计了
switch (swap_info[i].swap_map[j])
{
case 128: //BIT7,坏的
continue;
case 0: //0 表示空闲的
++val->freeswap;
default:
++val->totalswap;
}
}
val->freeswap <<= PAGE_SHIFT;
val->totalswap <<= PAGE_SHIFT;
return;
}
文档地址:http://blogimg.chinaunix.net/blog/upfile2/090318150923.pdf
相关阅读 更多 +