linux 1.0 内核注解 linux/kernel/fork.c
时间:2009-03-08 来源:taozhijiangscu
/********************************************
*Created By: 陶治江
*Date: 2009-3-6
********************************************/
//呃,操作系统涉及到太多的东西了,所以这就是他的复杂所在了,
//文件系统,内存管理 每一个都不是一个简单的东西,只有以后解决了 #include <linux/errno.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/stddef.h>
#include <linux/unistd.h>
#include <linux/segment.h>
#include <linux/ptrace.h>
#include <linux/malloc.h>
#include <linux/ldt.h> #include <asm/segment.h>
#include <asm/system.h> asmlinkage void ret_from_sys_call(void) __asm__("ret_from_sys_call"); /* These should maybe be in <linux/tasks.h> */ //#define NR_TASKS 128
#define MAX_TASKS_PER_USER (NR_TASKS/2)
#define MIN_TASKS_LEFT_FOR_ROOT 4 extern int shm_fork(struct task_struct *, struct task_struct *);
long last_pid=0; //寻找空闲的任务槽号,同时也开始查找空闲的任务
//进程号并保存在last_pid中,从这里的算法中可以看出
//系统总是从先前使用的last_pid后面开始查找
static int find_empty_process(void)
{
int free_task;
int i, tasks_free;
int this_user_tasks; repeat:
if ((++last_pid) & 0xffff8000) //进程PID的值小于32767的,空闲进程的PID是0
last_pid=1;
this_user_tasks = 0;
tasks_free = 0;
free_task = -EAGAIN;
i = NR_TASKS;
//遍历所有的任务槽
while (--i > 0) {
if (!task[i]) { //好的,空闲的
free_task = i;
tasks_free++;
continue;
}
if (task[i]->uid == current->uid) //这里对每个用户的进程数进行了统计
this_user_tasks++;
//呃,the last_pid has been used,sorry
if (task[i]->pid == last_pid || task[i]->pgrp == last_pid ||
task[i]->session == last_pid)
goto repeat;
}
if (tasks_free <= MIN_TASKS_LEFT_FOR_ROOT ||
this_user_tasks > MAX_TASKS_PER_USER)
if (current->uid) //不是空闲进程??
return -EAGAIN;
return free_task;
} //拷贝文件描述符,呃,或许这个到后面的文件系统那里会更清楚
static struct file * copy_fd(struct file * old_file)
{
struct file * new_file = get_empty_filp();
int error; if (new_file) {
memcpy(new_file,old_file,sizeof(struct file));
new_file->f_count = 1;
if (new_file->f_inode)
new_file->f_inode->i_count++;
if (new_file->f_op && new_file->f_op->open) {
error = new_file->f_op->open(new_file->f_inode,new_file);
if (error) {
iput(new_file->f_inode);
new_file->f_count = 0;
new_file = NULL;
}
}
}
return new_file;
} int dup_mmap(struct task_struct * tsk)
{
struct vm_area_struct * mpnt, **p, *tmp; tsk->mmap = NULL;
tsk->stk_vma = NULL;
p = &tsk->mmap;
for (mpnt = current->mmap ; mpnt ; mpnt = mpnt->vm_next) {
tmp = (struct vm_area_struct *) kmalloc(sizeof(struct vm_area_struct), GFP_KERNEL);
if (!tmp)
return -ENOMEM;
*tmp = *mpnt;
tmp->vm_task = tsk;
tmp->vm_next = NULL;
if (tmp->vm_inode)
tmp->vm_inode->i_count++;
*p = tmp;
p = &tmp->vm_next;
if (current->stk_vma == mpnt)
tsk->stk_vma = tmp;
}
return 0;
} #define IS_CLONE (regs.orig_eax == __NR_clone)
#define copy_vm(p) ((clone_flags & COPYVM)?copy_page_tables(p):clone_page_tables(p)) asmlinkage int sys_fork(struct pt_regs regs)
{
struct pt_regs * childregs;
struct task_struct *p;
int i,nr;
struct file *f;
unsigned long clone_flags = COPYVM | SIGCHLD; if(!(p = (struct task_struct*)__get_free_page(GFP_KERNEL))) //这里是分配一页的内存吧
goto bad_fork;
nr = find_empty_process(); //任务槽号
if (nr < 0)
goto bad_fork_free;
task[nr] = p;
//进行任务结构的复制,下面对某些特殊的域进行修改
*p = *current;
p->did_exec = 0;
p->kernel_stack_page = 0;
p->state = TASK_UNINTERRUPTIBLE; //等到显式唤醒
p->flags &= ~(PF_PTRACED|PF_TRACESYS);
p->pid = last_pid;
p->swappable = 1; //允许交换目前的地址空间?
p->p_pptr = p->p_opptr = current; //当前进程调用fork,成为了父亲
p->p_cptr = NULL;
SET_LINKS(p); //这个操作是将任务添加到任务结构的双向链表中
p->signal = 0;
p->it_real_value = p->it_virt_value = p->it_prof_value = 0;
p->it_real_incr = p->it_virt_incr = p->it_prof_incr = 0;
p->leader = 0; /* process leadership doesn't inherit */
p->utime = p->stime = 0;
p->cutime = p->cstime = 0;
p->min_flt = p->maj_flt = 0;
p->cmin_flt = p->cmaj_flt = 0;
p->start_time = jiffies; //进程起始时间
/*
* set up new TSS and kernel stack
*/
if (!(p->kernel_stack_page = __get_free_page(GFP_KERNEL)))
goto bad_fork_cleanup;
p->tss.es = KERNEL_DS;
p->tss.cs = KERNEL_CS;
p->tss.ss = KERNEL_DS;
p->tss.ds = KERNEL_DS;
p->tss.fs = USER_DS; //fs = USER_DS
p->tss.gs = KERNEL_DS;
p->tss.ss0 = KERNEL_DS;
p->tss.esp0 = p->kernel_stack_page + PAGE_SIZE; //堆栈指针是指向了页面的末尾的
p->tss.tr = _TSS(nr);
childregs = ((struct pt_regs *) (p->kernel_stack_page + PAGE_SIZE)) - 1;
p->tss.esp = (unsigned long) childregs;
p->tss.eip = (unsigned long) ret_from_sys_call; //可能是ret弹出到达ret_from_sys_call执行
//子进程的寄存器是通过参数传递进入的,这里进行参数的复制
//比较迷糊的是:这里复制,数据应该是向后面复制的,但是
//给的地址开始是在堆栈的底部啊,so???
*childregs = regs;
childregs->eax = 0;
p->tss.back_link = 0;
p->tss.eflags = regs.eflags & 0xffffcfff; /* iopl is always 0 for a new process */
if (IS_CLONE) {
//这里是根据给的参数来确定的,创建标志用ecx指定
//堆栈顶指针用ebx指定
//很像clone啊
//呃,答案 #define sys_clone sys_fork
if (regs.ebx)
childregs->esp = regs.ebx;
clone_flags = regs.ecx;
if (childregs->esp == regs.esp)
clone_flags |= COPYVM; //表示父子进程公用一个mm_struct
}
p->exit_signal = clone_flags & CSIGNAL; //当进程退出的时候所要发出的信号
p->tss.ldt = _LDT(nr);
if (p->ldt) {
p->ldt = (struct desc_struct*) vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE); //内存管理 :-(
if (p->ldt != NULL) //复制局部描述符表
memcpy(p->ldt, current->ldt, LDT_ENTRIES*LDT_ENTRY_SIZE);
}
p->tss.bitmap = offsetof(struct tss_struct,io_bitmap);
//unsigned long io_bitmap[IO_BITMAP_SIZE+1]; :-(
for (i = 0; i < IO_BITMAP_SIZE+1 ; i++) /* IO bitmap is actually SIZE+1 */
p->tss.io_bitmap[i] = ~0; //这里可以看出,io端口的设置没有被继承!!!
//协处理器的状态也是被继承的
if (last_task_used_math == current)
__asm__("clts ; fnsave %0 ; frstor %0":"=m" (p->tss.i387));
//信号,进程间通信 :-(
p->semun = NULL; p->shm = NULL;
if (copy_vm(p) || shm_fork(current, p))
goto bad_fork_cleanup;
//COPYFD /* set if fd's should be copied, not shared (NI) */
if (clone_flags & COPYFD) {
for (i=0; i<NR_OPEN;i++)
if ((f = p->filp[i]) != NULL)
p->filp[i] = copy_fd(f); //拷贝而不是共享
} else {
for (i=0; i<NR_OPEN;i++)
if ((f = p->filp[i]) != NULL)
f->f_count++;
}
//都是inode结构,应该是到文件系统那部分了~~
//至少这里告诉我们是共享的
if (current->pwd)
current->pwd->i_count++;
if (current->root)
current->root->i_count++;
if (current->executable)
current->executable->i_count++;
dup_mmap(p);
//每个任务占用两项GDT项,所以这里<<1了,而后面使用偏移补充
set_tss_desc(gdt+(nr<<1)+FIRST_TSS_ENTRY,&(p->tss));
if (p->ldt)
set_ldt_desc(gdt+(nr<<1)+FIRST_LDT_ENTRY,p->ldt, 512);
else
set_ldt_desc(gdt+(nr<<1)+FIRST_LDT_ENTRY,&default_ldt, 1); p->counter = current->counter >> 1; //这个操作是将子进程的时间设置为
//父进程时间的一半
p->state = TASK_RUNNING; /* do this last, just in case */
return p->pid;
bad_fork_cleanup:
task[nr] = NULL;
REMOVE_LINKS(p);
free_page(p->kernel_stack_page);
bad_fork_free:
free_page((long) p);
bad_fork:
return -EAGAIN;
}
文档地址:http://blogimg.chinaunix.net/blog/upfile2/090308112839.pdf
*Created By: 陶治江
*Date: 2009-3-6
********************************************/
//呃,操作系统涉及到太多的东西了,所以这就是他的复杂所在了,
//文件系统,内存管理 每一个都不是一个简单的东西,只有以后解决了 #include <linux/errno.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/stddef.h>
#include <linux/unistd.h>
#include <linux/segment.h>
#include <linux/ptrace.h>
#include <linux/malloc.h>
#include <linux/ldt.h> #include <asm/segment.h>
#include <asm/system.h> asmlinkage void ret_from_sys_call(void) __asm__("ret_from_sys_call"); /* These should maybe be in <linux/tasks.h> */ //#define NR_TASKS 128
#define MAX_TASKS_PER_USER (NR_TASKS/2)
#define MIN_TASKS_LEFT_FOR_ROOT 4 extern int shm_fork(struct task_struct *, struct task_struct *);
long last_pid=0; //寻找空闲的任务槽号,同时也开始查找空闲的任务
//进程号并保存在last_pid中,从这里的算法中可以看出
//系统总是从先前使用的last_pid后面开始查找
static int find_empty_process(void)
{
int free_task;
int i, tasks_free;
int this_user_tasks; repeat:
if ((++last_pid) & 0xffff8000) //进程PID的值小于32767的,空闲进程的PID是0
last_pid=1;
this_user_tasks = 0;
tasks_free = 0;
free_task = -EAGAIN;
i = NR_TASKS;
//遍历所有的任务槽
while (--i > 0) {
if (!task[i]) { //好的,空闲的
free_task = i;
tasks_free++;
continue;
}
if (task[i]->uid == current->uid) //这里对每个用户的进程数进行了统计
this_user_tasks++;
//呃,the last_pid has been used,sorry
if (task[i]->pid == last_pid || task[i]->pgrp == last_pid ||
task[i]->session == last_pid)
goto repeat;
}
if (tasks_free <= MIN_TASKS_LEFT_FOR_ROOT ||
this_user_tasks > MAX_TASKS_PER_USER)
if (current->uid) //不是空闲进程??
return -EAGAIN;
return free_task;
} //拷贝文件描述符,呃,或许这个到后面的文件系统那里会更清楚
static struct file * copy_fd(struct file * old_file)
{
struct file * new_file = get_empty_filp();
int error; if (new_file) {
memcpy(new_file,old_file,sizeof(struct file));
new_file->f_count = 1;
if (new_file->f_inode)
new_file->f_inode->i_count++;
if (new_file->f_op && new_file->f_op->open) {
error = new_file->f_op->open(new_file->f_inode,new_file);
if (error) {
iput(new_file->f_inode);
new_file->f_count = 0;
new_file = NULL;
}
}
}
return new_file;
} int dup_mmap(struct task_struct * tsk)
{
struct vm_area_struct * mpnt, **p, *tmp; tsk->mmap = NULL;
tsk->stk_vma = NULL;
p = &tsk->mmap;
for (mpnt = current->mmap ; mpnt ; mpnt = mpnt->vm_next) {
tmp = (struct vm_area_struct *) kmalloc(sizeof(struct vm_area_struct), GFP_KERNEL);
if (!tmp)
return -ENOMEM;
*tmp = *mpnt;
tmp->vm_task = tsk;
tmp->vm_next = NULL;
if (tmp->vm_inode)
tmp->vm_inode->i_count++;
*p = tmp;
p = &tmp->vm_next;
if (current->stk_vma == mpnt)
tsk->stk_vma = tmp;
}
return 0;
} #define IS_CLONE (regs.orig_eax == __NR_clone)
#define copy_vm(p) ((clone_flags & COPYVM)?copy_page_tables(p):clone_page_tables(p)) asmlinkage int sys_fork(struct pt_regs regs)
{
struct pt_regs * childregs;
struct task_struct *p;
int i,nr;
struct file *f;
unsigned long clone_flags = COPYVM | SIGCHLD; if(!(p = (struct task_struct*)__get_free_page(GFP_KERNEL))) //这里是分配一页的内存吧
goto bad_fork;
nr = find_empty_process(); //任务槽号
if (nr < 0)
goto bad_fork_free;
task[nr] = p;
//进行任务结构的复制,下面对某些特殊的域进行修改
*p = *current;
p->did_exec = 0;
p->kernel_stack_page = 0;
p->state = TASK_UNINTERRUPTIBLE; //等到显式唤醒
p->flags &= ~(PF_PTRACED|PF_TRACESYS);
p->pid = last_pid;
p->swappable = 1; //允许交换目前的地址空间?
p->p_pptr = p->p_opptr = current; //当前进程调用fork,成为了父亲
p->p_cptr = NULL;
SET_LINKS(p); //这个操作是将任务添加到任务结构的双向链表中
p->signal = 0;
p->it_real_value = p->it_virt_value = p->it_prof_value = 0;
p->it_real_incr = p->it_virt_incr = p->it_prof_incr = 0;
p->leader = 0; /* process leadership doesn't inherit */
p->utime = p->stime = 0;
p->cutime = p->cstime = 0;
p->min_flt = p->maj_flt = 0;
p->cmin_flt = p->cmaj_flt = 0;
p->start_time = jiffies; //进程起始时间
/*
* set up new TSS and kernel stack
*/
if (!(p->kernel_stack_page = __get_free_page(GFP_KERNEL)))
goto bad_fork_cleanup;
p->tss.es = KERNEL_DS;
p->tss.cs = KERNEL_CS;
p->tss.ss = KERNEL_DS;
p->tss.ds = KERNEL_DS;
p->tss.fs = USER_DS; //fs = USER_DS
p->tss.gs = KERNEL_DS;
p->tss.ss0 = KERNEL_DS;
p->tss.esp0 = p->kernel_stack_page + PAGE_SIZE; //堆栈指针是指向了页面的末尾的
p->tss.tr = _TSS(nr);
childregs = ((struct pt_regs *) (p->kernel_stack_page + PAGE_SIZE)) - 1;
p->tss.esp = (unsigned long) childregs;
p->tss.eip = (unsigned long) ret_from_sys_call; //可能是ret弹出到达ret_from_sys_call执行
//子进程的寄存器是通过参数传递进入的,这里进行参数的复制
//比较迷糊的是:这里复制,数据应该是向后面复制的,但是
//给的地址开始是在堆栈的底部啊,so???
*childregs = regs;
childregs->eax = 0;
p->tss.back_link = 0;
p->tss.eflags = regs.eflags & 0xffffcfff; /* iopl is always 0 for a new process */
if (IS_CLONE) {
//这里是根据给的参数来确定的,创建标志用ecx指定
//堆栈顶指针用ebx指定
//很像clone啊
//呃,答案 #define sys_clone sys_fork
if (regs.ebx)
childregs->esp = regs.ebx;
clone_flags = regs.ecx;
if (childregs->esp == regs.esp)
clone_flags |= COPYVM; //表示父子进程公用一个mm_struct
}
p->exit_signal = clone_flags & CSIGNAL; //当进程退出的时候所要发出的信号
p->tss.ldt = _LDT(nr);
if (p->ldt) {
p->ldt = (struct desc_struct*) vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE); //内存管理 :-(
if (p->ldt != NULL) //复制局部描述符表
memcpy(p->ldt, current->ldt, LDT_ENTRIES*LDT_ENTRY_SIZE);
}
p->tss.bitmap = offsetof(struct tss_struct,io_bitmap);
//unsigned long io_bitmap[IO_BITMAP_SIZE+1]; :-(
for (i = 0; i < IO_BITMAP_SIZE+1 ; i++) /* IO bitmap is actually SIZE+1 */
p->tss.io_bitmap[i] = ~0; //这里可以看出,io端口的设置没有被继承!!!
//协处理器的状态也是被继承的
if (last_task_used_math == current)
__asm__("clts ; fnsave %0 ; frstor %0":"=m" (p->tss.i387));
//信号,进程间通信 :-(
p->semun = NULL; p->shm = NULL;
if (copy_vm(p) || shm_fork(current, p))
goto bad_fork_cleanup;
//COPYFD /* set if fd's should be copied, not shared (NI) */
if (clone_flags & COPYFD) {
for (i=0; i<NR_OPEN;i++)
if ((f = p->filp[i]) != NULL)
p->filp[i] = copy_fd(f); //拷贝而不是共享
} else {
for (i=0; i<NR_OPEN;i++)
if ((f = p->filp[i]) != NULL)
f->f_count++;
}
//都是inode结构,应该是到文件系统那部分了~~
//至少这里告诉我们是共享的
if (current->pwd)
current->pwd->i_count++;
if (current->root)
current->root->i_count++;
if (current->executable)
current->executable->i_count++;
dup_mmap(p);
//每个任务占用两项GDT项,所以这里<<1了,而后面使用偏移补充
set_tss_desc(gdt+(nr<<1)+FIRST_TSS_ENTRY,&(p->tss));
if (p->ldt)
set_ldt_desc(gdt+(nr<<1)+FIRST_LDT_ENTRY,p->ldt, 512);
else
set_ldt_desc(gdt+(nr<<1)+FIRST_LDT_ENTRY,&default_ldt, 1); p->counter = current->counter >> 1; //这个操作是将子进程的时间设置为
//父进程时间的一半
p->state = TASK_RUNNING; /* do this last, just in case */
return p->pid;
bad_fork_cleanup:
task[nr] = NULL;
REMOVE_LINKS(p);
free_page(p->kernel_stack_page);
bad_fork_free:
free_page((long) p);
bad_fork:
return -EAGAIN;
}
文档地址:http://blogimg.chinaunix.net/blog/upfile2/090308112839.pdf
相关阅读 更多 +
排行榜 更多 +