文章详情

  • 游戏榜单
  • 软件榜单
关闭导航
热搜榜
热门下载
热门标签
php爱好者> php文档>linux 1.0 内核注解 linux/fs/exec.c

linux 1.0 内核注解 linux/fs/exec.c

时间:2009-05-24  来源:taozhijiangscu

/********************************************
 *Created By: Prometheus
 *Date        : 2009-5-24   
 ********************************************/
/*
 *  linux/fs/exec.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */
/*
 * #!-checking implemented by tytso.
 */
/*
 * Demand-loading implemented 01.12.91 - no need to read anything but
 * the header into memory. The inode of the executable is put into
 * "current->executable", and page faults do the actual loading. Clean.
 *
 * Once more I can proudly say that linux stood up to being changed: it
 * was less than 2 hours work to get demand-loading completely implemented.
 *
 * Demand loading changed July 1993 by Eric Youngdale.   Use mmap instead,
 * current->executable is only used by the procfs.  This allows a dispatch
 * table to check for several different types  of binary formats.  We keep
 * trying until we recognize the file or we run out of supported binary
 * formats.
 */
#include <linux/fs.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/a.out.h>
#include <linux/errno.h>
#include <linux/signal.h>
#include <linux/string.h>
#include <linux/stat.h>
#include <linux/fcntl.h>
#include <linux/ptrace.h>
#include <linux/user.h>
#include <linux/segment.h>
#include <linux/malloc.h>
#include <asm/system.h> #include <linux/binfmts.h> #include <asm/segment.h>
#include <asm/system.h>
asmlinkage int sys_exit(int exit_code);
asmlinkage int sys_close(unsigned fd);
asmlinkage int sys_open(const char *, int, int);
asmlinkage int sys_brk(unsigned long);
extern void shm_exit (void); int open_inode(struct inode * inode, int mode)
{
 int error, fd;
 struct file *f, **fpp;
 if (!inode->i_op || !inode->i_op->default_file_ops)
  return -EINVAL;
 f = get_empty_filp();
 if (!f)
  return -EMFILE;
 fd = 0;
 fpp = current->filp;
 for (;;) {
  if (!*fpp)
   break;
  if (++fd > NR_OPEN)
   return -ENFILE;
  fpp++;
 }
 *fpp = f;
 f->f_flags = mode;
 f->f_mode = (mode+1) & O_ACCMODE;
 f->f_inode = inode;
 f->f_pos = 0;
 f->f_reada = 0;
 f->f_op = inode->i_op->default_file_ops;
 if (f->f_op->open) {
  error = f->f_op->open(inode,f);
  if (error) {
   *fpp = NULL;
   f->f_count--;
   return error;
  }
 }
 inode->i_count++;
 return fd;
}
/*
 * These are the only things you should do on a core-file: use only these
 * macros to write out all the necessary info.
 */
#define DUMP_WRITE(addr,nr) \
while (file.f_op->write(inode,&file,(char *)(addr),(nr)) != (nr)) goto close_coredump
#define DUMP_SEEK(offset) \
if (file.f_op->lseek) { \
 if (file.f_op->lseek(inode,&file,(offset),0) != (offset)) \
   goto close_coredump; \
} else file.f_pos = (offset)  
/*
 * Routine writes a core dump image in the current directory.
 * Currently only a stub-function.
 *
 * Note that setuid/setgid files won't make a core-dump if the uid/gid
 * changed due to the set[u|g]id. It's enforced by the "current->dumpable"
 * field, which also makes sure the core-dumps won't be recursive if the
 * dumping of the process results in another error..
 */
int core_dump(long signr, struct pt_regs * regs)
{
 struct inode * inode = NULL;
 struct file file;
 unsigned short fs;
 int has_dumped = 0;
 char corefile[6+sizeof(current->comm)];
 int i;
 register int dump_start, dump_size;
 struct user dump;
 if (!current->dumpable)
  return 0;
 current->dumpable = 0;
/* See if we have enough room to write the upage.  */
 if (current->rlim[RLIMIT_CORE].rlim_cur < PAGE_SIZE)
  return 0;
 fs = get_fs();
 set_fs(KERNEL_DS);
 memcpy(corefile,"core.",5);
#if 0
 memcpy(corefile+5,current->comm,sizeof(current->comm));
#else
 corefile[4] = '\0';
#endif
    //O_RDWR        //读写
 if (open_namei(corefile,O_CREAT | 2 | O_TRUNC,0600,&inode,NULL)) {
  inode = NULL;
  goto end_coredump;
 }
 if (!S_ISREG(inode->i_mode))
  goto end_coredump;
 if (!inode->i_op || !inode->i_op->default_file_ops)
  goto end_coredump;
 file.f_mode = 3;
 file.f_flags = 0;
 file.f_count = 1;
 file.f_inode = inode;
 file.f_pos = 0;
 file.f_reada = 0;
 file.f_op = inode->i_op->default_file_ops;
 if (file.f_op->open)
  if (file.f_op->open(inode,&file))
   goto end_coredump;
 if (!file.f_op->write)
  goto close_coredump;
 has_dumped = 1;
/* changed the size calculations - should hopefully work better. lbt */
 dump.magic = CMAGIC; //Code indicating core file.
 dump.start_code = 0;
 dump.start_stack = regs->esp & ~(PAGE_SIZE - 1);
 dump.u_tsize = ((unsigned long) current->end_code) >> 12;
 dump.u_dsize = ((unsigned long) (current->brk + (PAGE_SIZE-1))) >> 12;
 dump.u_dsize -= dump.u_tsize;  //要减的,呵呵
 dump.u_ssize = 0;  //Stack segment size (pages)
 for(i=0; i<8; i++) dump.u_debugreg[i] = current->debugreg[i]; 
 if (dump.start_stack < TASK_SIZE)
  dump.u_ssize = ((unsigned long) (TASK_SIZE - dump.start_stack)) >> 12;
 //对于数据和堆栈,首先检测进程的限制,如果空间限制允许就进行记录,否则设置为0
 //从这里的设置来看是优先保存堆栈信息的
/* If the size of the dump file exceeds the rlimit, then see what would happen
   if we wrote the stack, but not the data area.  */
 if ((dump.u_dsize+dump.u_ssize+1) * PAGE_SIZE >
     current->rlim[RLIMIT_CORE].rlim_cur)
  dump.u_dsize = 0;
/* Make sure we have enough room to write the stack and data areas. */
 if ((dump.u_ssize+1) * PAGE_SIZE >
     current->rlim[RLIMIT_CORE].rlim_cur)
  dump.u_ssize = 0;
        strncpy(dump.u_comm, current->comm, sizeof(current->comm)); //user command
 dump.u_ar0 = (struct pt_regs *)(((int)(&dump.regs)) -((int)(&dump)));
 dump.signal = signr;
 dump.regs = *regs;
/* Flag indicating the math stuff is valid. We don't support this for the
   soft-float routines yet */
 if (hard_math) {
  if ((dump.u_fpvalid = current->used_math) != 0) { //使用了浮点运算
   if (last_task_used_math == current)  //直接取浮点运算单元的数据保存
    __asm__("clts ; fnsave %0": :"m" (dump.i387));
   else
    memcpy(&dump.i387,&current->tss.i387.hard,sizeof(dump.i387));
  }
 } else {
  /* we should dump the emulator state here, but we need to
     convert it into standard 387 format first.. */
  dump.u_fpvalid = 0;
 }
 set_fs(KERNEL_DS);
/* struct user */
 DUMP_WRITE(&dump,sizeof(dump));
/* Now dump all of the user data.  Include malloced stuff as well */
 DUMP_SEEK(PAGE_SIZE);
/* now we start writing out the user space info */
 set_fs(USER_DS);
/* Dump the data area */
 if (dump.u_dsize != 0) {
  dump_start = dump.u_tsize << 12;
  dump_size = dump.u_dsize << 12;
  DUMP_WRITE(dump_start,dump_size);
 };
/* Now prepare to dump the stack area */
 if (dump.u_ssize != 0) {
  dump_start = dump.start_stack;
  dump_size = dump.u_ssize << 12;
  DUMP_WRITE(dump_start,dump_size);
 };
/* Finally dump the task struct.  Not be used by gdb, but could be useful */
 set_fs(KERNEL_DS);
 DUMP_WRITE(current,sizeof(*current));
close_coredump:
 if (file.f_op->release)
  file.f_op->release(inode,&file);
end_coredump:
 set_fs(fs);
 iput(inode);
 return has_dumped;
}
/*
 * Note that a shared library must be both readable and executable due to
 * security reasons.
 *
 * Also note that we take the address to load from from the file itself.
 */
//加载共享库,参数是库名。成功返回0
asmlinkage int sys_uselib(const char * library)
{
 int fd, retval;
 struct file * file;
 struct linux_binfmt * fmt;
 fd = sys_open(library, 0, 0);
 if (fd < 0)
  return fd;
 file = current->filp[fd];
 retval = -ENOEXEC;
 if (file && file->f_inode && file->f_op && file->f_op->read) {
  fmt = formats;  //这里的formats是一个全局的数组名,这里是遍历尝试进行加载的
  do {
   int (*fn)(int) = fmt->load_shlib;
   if (!fn)  //数组最后是用NULL结尾的,所以这里应该是不合法的类型了
    break;
   retval = fn(fd);
   fmt++;
  } while (retval == -ENOEXEC);
 }
 sys_close(fd);
   return retval;
}
/*
 * create_tables() parses the env- and arg-strings in new user
 * memory and creates the pointer tables from them, and puts their
 * addresses on the "stack", returning the new stack pointer value.
 */  //差不多
unsigned long * create_tables(char * p,int argc,int envc,int ibcs)
{
 unsigned long *argv,*envp;
 unsigned long * sp;
 struct vm_area_struct *mpnt;
 mpnt = (struct vm_area_struct *)kmalloc(sizeof(*mpnt), GFP_KERNEL);
 if (mpnt) {
  mpnt->vm_task = current;
  mpnt->vm_start = PAGE_MASK & (unsigned long) p;
  mpnt->vm_end = TASK_SIZE;
  mpnt->vm_page_prot = PAGE_PRIVATE|PAGE_DIRTY;
  mpnt->vm_share = NULL;
  mpnt->vm_inode = NULL;
  mpnt->vm_offset = 0;
  mpnt->vm_ops = NULL;
  insert_vm_struct(current, mpnt);
  current->stk_vma = mpnt;
 }
 sp = (unsigned long *) (0xfffffffc & (unsigned long) p);
 sp -= envc+1;  //因为空指针需要多跳过一个
 envp = sp;
 sp -= argc+1;
 argv = sp;
 if (!ibcs) { //
  put_fs_long((unsigned long)envp,--sp);
  put_fs_long((unsigned long)argv,--sp);
 }
 //呃,总算是理清楚了,这里的argv envp实际都算指向sp的不同位置,而
 //堆栈我们只能是存储字符串指针,那么p实际就算解析这些字符串然后把
 //字符串头部指针放置到sp中的,呵呵
 put_fs_long((unsigned long)argc,--sp);
 current->arg_start = (unsigned long) p;
 while (argc-->0) {
  put_fs_long((unsigned long) p,argv++);
  while (get_fs_byte(p++)) /* nothing */ ;
 }
 put_fs_long(0,argv);
 current->arg_end = current->env_start = (unsigned long) p;
 while (envc-->0) {
  put_fs_long((unsigned long) p,envp++);
  while (get_fs_byte(p++)) /* nothing */ ;
 }
 put_fs_long(0,envp);
 current->env_end = (unsigned long) p;
 return sp; //返回栈顶地址
}
/*
 * count() counts the number of arguments/envelopes
 */
static int count(char ** argv)
{
 int i=0;
 char ** tmp;
 if ((tmp = argv) != 0)
  while (get_fs_long((unsigned long *) (tmp++))) //这里的堆栈参数和环境变量都是规测的4字节了
   i++;
 return i;
}
/*
 * 'copy_string()' copies argument/envelope strings from user
 * memory to free pages in kernel mem. These are in a format ready
 * to be put directly into the top of new user memory.
 *
 * Modified by TYT, 11/24/91 to add the from_kmem argument, which specifies
 * whether the string and the string array are from user or kernel segments:
 *
 * from_kmem     argv *        argv **
 *    0          user space    user space
 *    1          kernel space  user space
 *    2          kernel space  kernel space
 *
 * We do this by playing games with the fs segment register.  Since it
 * it is expensive to load a segment register, we try to avoid calling
 * set_fs() unless we absolutely have to.
 */
//将用户态的数据拷贝到内核态
unsigned long copy_strings(int argc,char ** argv,unsigned long *page,
  unsigned long p, int from_kmem)
{
 char *tmp, *pag = NULL;
 int len, offset = 0;
 unsigned long old_fs, new_fs;
 if (!p)
  return 0; /* bullet-proofing */
 new_fs = get_ds();
 old_fs = get_fs();
 if (from_kmem==2)  //内核空间到内核空间
  set_fs(new_fs);  //fs->内核段
 while (argc-- > 0) {
  if (from_kmem == 1) //如果argv在内核空间,就将fs设置成内核空间
   set_fs(new_fs);
  if (!(tmp = (char *)get_fs_long(((unsigned long *)argv)+argc))) //获得字符串的地址(地址本身根据from_kmem可能在
          //内核空间或者用户空间的)
   panic("VFS: argc is wrong");
  if (from_kmem == 1) //如果必要要立即恢复,这里很巧妙,如果from_kmem==2,那么就是内核空间数据的交换。fs就一直是内核空间
     //了,这里的改变复原是不会发生的。同样的如果from_kmem==0,那么fs默认的就是用户空间,也不用改变了
   set_fs(old_fs);
  len=0;  /* remember zero-padding */
  do {
   len++;
  } while (get_fs_byte(tmp++));
   //bprm.p = PAGE_SIZE*MAX_ARG_PAGES-4;
  if (p < len) { /* this shouldn't happen - 128kB */
   set_fs(old_fs);
   return 0;
  }
  while (len) {  //下面的argv**实际的字符数据只有当from_kmem==2时候在内核空间
     //而其他时候就在用户空间了,当from_kmem!=2时候fs应该此时是用户空间的
   --p; --tmp; --len;
   if (--offset < 0) { //表示页面内部的偏移,当offset<0的时候就需要申请新的页面了
    offset = p % PAGE_SIZE;
    if (from_kmem==2)
     set_fs(old_fs);
    if (!(pag = (char *) page[p/PAGE_SIZE]) &&
        !(pag = (char *) page[p/PAGE_SIZE] =
          (unsigned long *) get_free_page(GFP_USER))) //申请失败,看看这里是用户空间的地址
     return 0;
    if (from_kmem==2)
     set_fs(new_fs);
   }
   *(pag + offset) = get_fs_byte(tmp); //复制一字节的数据
  }
 }
 if (from_kmem==2)
  set_fs(old_fs);
 return p;
}
unsigned long change_ldt(unsigned long text_size,unsigned long * page)
{
 unsigned long code_limit,data_limit,code_base,data_base;
 int i;
 code_limit = TASK_SIZE;
 data_limit = TASK_SIZE;
 code_base = data_base = 0;
 current->start_code = code_base; //0
 data_base += data_limit;
 for (i=MAX_ARG_PAGES-1 ; i>=0 ; i--) {
  data_base -= PAGE_SIZE;  //页面起始地址,看来是从3G的用户地址的末尾开始排列
  if (page[i]) {
   current->rss++;
   put_dirty_page(current,page[i],data_base);
  }
 }
 return data_limit;
}
/*
 * Read in the complete executable. This is used for "-N" files
 * that aren't on a block boundary, and for files on filesystems
 * without bmap support.
 */
int read_exec(struct inode *inode, unsigned long offset,
 char * addr, unsigned long count)
{
 struct file file;
 int result = -ENOEXEC;
 if (!inode->i_op || !inode->i_op->default_file_ops)
  goto end_readexec;
 file.f_mode = 1;
 file.f_flags = 0;
 file.f_count = 1;
 file.f_inode = inode;
 file.f_pos = 0;
 file.f_reada = 0;
 file.f_op = inode->i_op->default_file_ops;
 if (file.f_op->open)
  if (file.f_op->open(inode,&file))
   goto end_readexec;
 if (!file.f_op || !file.f_op->read)
  goto close_readexec;
 if (file.f_op->lseek) {
  if (file.f_op->lseek(inode,&file,offset,0) != offset) //从头偏移到指定的offset位置
    goto close_readexec;
 } else
  file.f_pos = offset; //呃,如果能直接这样设置偏移?那为什么不用呢?
 if (get_fs() == USER_DS) {
  result = verify_area(VERIFY_WRITE, addr, count);
  if (result)
   goto close_readexec;
 }
 result = file.f_op->read(inode, &file, addr, count); //读取到addr指向的地址
close_readexec:
 if (file.f_op->release)
  file.f_op->release(inode,&file);
end_readexec:
 return result;
}

/*
 * This function flushes out all traces of the currently running executable so
 * that a new one can be started
 */
//基本将原进程的很多的资源给释放掉
void flush_old_exec(struct linux_binprm * bprm)
{
 int i;
 int ch;
 char * name;
 struct vm_area_struct * mpnt, *mpnt1;
 current->dumpable = 1;
 name = bprm->filename;
 for (i=0; (ch = *(name++)) != '\0';) {
  if (ch == '/')
   i = 0;
  else
   if (i < 15) //命令只能15个字符?
    current->comm[i++] = ch;
 }
 current->comm[i] = '\0';
 if (current->shm)
  shm_exit();
 if (current->executable) {
  iput(current->executable);
  current->executable = NULL; //取消当前的执行镜像
 }
 /* Release all of the old mmap stuff. */
 mpnt = current->mmap;
 current->mmap = NULL;
 current->stk_vma = NULL;
 while (mpnt) {
  mpnt1 = mpnt->vm_next;
  if (mpnt->vm_ops && mpnt->vm_ops->close)
   mpnt->vm_ops->close(mpnt);
  kfree(mpnt);
  mpnt = mpnt1;
 }
 /* Flush the old ldt stuff... */
 if (current->ldt) {
  free_page((unsigned long) current->ldt);
  current->ldt = NULL;
  for (i=1 ; i<NR_TASKS ; i++) {
   if (task[i] == current)  {
    set_ldt_desc(gdt+(i<<1)+
          FIRST_LDT_ENTRY,&default_ldt, 1); //替换成默认的局部描述符表
    load_ldt(i);
   }
  } 
 }
 for (i=0 ; i<8 ; i++) current->debugreg[i] = 0;  if (bprm->e_uid != current->euid || bprm->e_gid != current->egid ||
     !permission(bprm->inode,MAY_READ))
  current->dumpable = 0;
 current->signal = 0;
 for (i=0 ; i<32 ; i++) {
  current->sigaction[i].sa_mask = 0;
  current->sigaction[i].sa_flags = 0;
  if (current->sigaction[i].sa_handler != SIG_IGN)
   current->sigaction[i].sa_handler = NULL;
 }
 for (i=0 ; i<NR_OPEN ; i++)
  if (FD_ISSET(i,&current->close_on_exec))
   sys_close(i);  //so close on exec
 FD_ZERO(&current->close_on_exec); //默认的是复位的
 clear_page_tables(current);
 if (last_task_used_math == current)
  last_task_used_math = NULL;
 current->used_math = 0;
 current->elf_executable = 0;
}
/*
 * sys_execve() executes a new program.
 */
static int do_execve(char * filename, char ** argv, char ** envp, struct pt_regs * regs)
{
 struct linux_binprm bprm;
 struct linux_binfmt * fmt;
 unsigned long old_fs;
 int i;
 int retval;
 int sh_bang = 0;
 if (regs->cs != USER_CS) //必须是用户空间的代码段哈
  return -EINVAL;
 bprm.p = PAGE_SIZE*MAX_ARG_PAGES-4;
 for (i=0 ; i<MAX_ARG_PAGES ; i++) /* clear page-table */
  bprm.page[i] = 0;
 retval = open_namei(filename, 0, 0, &bprm.inode, NULL); //打开执行文件
 if (retval)
  return retval;
 bprm.filename = filename;
 bprm.argc = count(argv);
 bprm.envc = count(envp);
 
restart_interp:
 if (!S_ISREG(bprm.inode->i_mode)) { /* must be regular file */
  retval = -EACCES;
  goto exec_error2;
 }
 if (IS_NOEXEC(bprm.inode)) {  /* FS mustn't be mounted noexec */
  retval = -EPERM;
  goto exec_error2;
 }
 if (!bprm.inode->i_sb) {
  retval = -EACCES;
  goto exec_error2;
 }
 i = bprm.inode->i_mode;
 if (IS_NOSUID(bprm.inode) && ( //如果设置了忽略粘置位
  ((i & S_ISUID) && bprm.inode->i_uid != current->euid)  //我想的是如果忽略粘滞位,而一般的粘滞位文件都是要
                //有宿主的权限执行的,所以这里直接错误返回了
  || ((i & S_ISGID) && !in_group_p(bprm.inode->i_gid))
        )
     &&!suser())
 {
  retval = -EPERM;
  goto exec_error2;
 }
 /* make sure we don't let suid, sgid files be ptraced. */
 if (current->flags & PF_PTRACED) {
  bprm.e_uid = current->euid;
  bprm.e_gid = current->egid;
 } else {
  bprm.e_uid = (i & S_ISUID) ? bprm.inode->i_uid : current->euid; //否则设置成文件所有者的uid gid
  bprm.e_gid = (i & S_ISGID) ? bprm.inode->i_gid : current->egid;
 }
 
 if (current->euid == bprm.inode->i_uid)
  i >>= 6;
 else if (in_group_p(bprm.inode->i_gid))
  i >>= 3;
 if (!(i & 1) &&
     !((bprm.inode->i_mode & 0111) && suser())
 ) {
  retval = -EACCES;
  goto exec_error2;
 }
 memset(bprm.buf,0,sizeof(bprm.buf));
 old_fs = get_fs();
 set_fs(get_ds());
 retval = read_exec(bprm.inode,0,bprm.buf,128);  //读取执行文件开头
 set_fs(old_fs);
 if (retval < 0)
  goto exec_error2;   //这里的sh_bang实际是一个标志,当解析器设置完之后就自加它,下次就不会执行这些代码
 if ((bprm.buf[0] == '#') && (bprm.buf[1] == '!') && (!sh_bang)) { //执行脚本文件,需要加载解析器
  /*
   * This section does the #! interpretation.
   * Sorta complicated, but hopefully it will work.  -TYT
   */
  char *cp, *interp, *i_name, *i_arg;   iput(bprm.inode);
  bprm.buf[127] = '\0';
  if ((cp = strchr(bprm.buf, '\n')) == NULL) //查找字符串中首次出现换行符的位置,如果没哟找到就直接指到128字符的最后
   cp = bprm.buf+127;
  *cp = '\0';
  while (cp > bprm.buf) {
   cp--;
   if ((*cp == ' ') || (*cp == '\t'))
    *cp = '\0'; //滤除解析器名字后面的空白字符
   else
    break;
  }
  //跳过 #!,并跳过#!后面的空白字符
  for (cp = bprm.buf+2; (*cp == ' ') || (*cp == '\t'); cp++);
  if (!cp || *cp == '\0') {
   retval = -ENOEXEC; /* No interpreter name found */
   goto exec_error1;
  }
  interp = i_name = cp;
  i_arg = 0;
  for ( ; *cp && (*cp != ' ') && (*cp != '\t'); cp++) {
    if (*cp == '/')
    i_name = cp+1; //是不是找到 /../..最后的名字阿
  }
  while ((*cp == ' ') || (*cp == '\t'))
   *cp++ = '\0';
  if (*cp) //解析器名字后面带有的参数
   i_arg = cp;
  /*
   * OK, we've parsed out the interpreter name and
   * (optional) argument.
   */
  if (sh_bang++ == 0) {
   //这里进行了环境变量和参数的复制,p作为参数传递进去,同时也
   //被更新后返回,最后p指向的是第一个执行参数
   bprm.p = copy_strings(bprm.envc, envp, bprm.page, bprm.p, 0);
   bprm.p = copy_strings(--bprm.argc, argv+1, bprm.page, bprm.p, 0);
  }
  /*
   * Splice in (1) the interpreter's name for argv[0]
   *           (2) (optional) argument to interpreter
   *           (3) filename of shell script
   *
   * This is done in reverse order, because of how the
   * user environment and arguments are stored.
   */
  bprm.p = copy_strings(1, &bprm.filename, bprm.page, bprm.p, 2); //kernel->kernel,复制执行文件名
  bprm.argc++;
  if (i_arg) {
   bprm.p = copy_strings(1, &i_arg, bprm.page, bprm.p, 2); //复制解释器的参数
   bprm.argc++;
  }
  bprm.p = copy_strings(1, &i_name, bprm.page, bprm.p, 2); //复制解释器的名字
  bprm.argc++;
  if (!bprm.p) {
   retval = -E2BIG;
   goto exec_error1;
  }
  /*
   * OK, now restart the process with the interpreter's inode.
   * Note that we use open_namei() as the name is now in kernel
   * space, and we don't need to copy it.
   */
  //这里可以加载执行解析器了
  retval = open_namei(interp, 0, 0, &bprm.inode, NULL);
  if (retval)
   goto exec_error1;
  goto restart_interp;  //这里从新执行的一遍的原因估计是上面是对脚本文件本身执行的,实际脚本文件
      //本身不是合法的可执行的二进制的文件,而上面用open_namei打开解释器了,应该
      //就更新了bprm.inode了,执行这个可执行的二进制程序的设置
 }
 //执行这里说明不是脚本文件,那么上面的参数和环境信息都没有被设置,这里拷贝设置
 if (!sh_bang) {
  bprm.p = copy_strings(bprm.envc,envp,bprm.page,bprm.p,0); //user->kernel
  bprm.p = copy_strings(bprm.argc,argv,bprm.page,bprm.p,0);
  if (!bprm.p) {
   retval = -E2BIG;
   goto exec_error2;
  }
 }
 bprm.sh_bang = sh_bang;
 fmt = formats;
 do {
  int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary;
  if (!fn)
   break;
  retval = fn(&bprm, regs);
  if (retval == 0) {
   iput(bprm.inode);
   current->did_exec = 1;
   return 0;  //这里执行文件就被加载了,成功了!
      //一旦return eip弹出就更新进程了
  }
  fmt++;
 } while (retval == -ENOEXEC);
exec_error2:
 iput(bprm.inode);
exec_error1:
 for (i=0 ; i<MAX_ARG_PAGES ; i++)
  free_page(bprm.page[i]);
 return(retval);
}
/*
 * sys_execve() executes a new program.
 */
asmlinkage int sys_execve(struct pt_regs regs)
{
 int error;
 char * filename;
 //将文件名称拷贝到内核空间中
 error = getname((char *) regs.ebx, &filename);
 if (error)
  return error;
 error = do_execve(filename, (char **) regs.ecx, (char **) regs.edx, &regs);
 putname(filename); //释放空间的,一般的do_execve是不会返回的,到这里说明是出错了
 return error;
}
/*
 * These are  the prototypes for the  functions in the  dispatch table, as
 * well as the  dispatch  table itself.
 */
extern int load_aout_binary(struct linux_binprm *,
       struct pt_regs * regs);
extern int load_aout_library(int fd);
#ifdef CONFIG_BINFMT_ELF
extern int load_elf_binary(struct linux_binprm *,
       struct pt_regs * regs);
extern int load_elf_library(int fd);
#endif
#ifdef CONFIG_BINFMT_COFF
extern int load_coff_binary(struct linux_binprm *,
       struct pt_regs * regs);
extern int load_coff_library(int fd);
#endif
/* Here are the actual binaries that will be accepted  */
struct linux_binfmt formats[] = {
 {load_aout_binary, load_aout_library},
#ifdef CONFIG_BINFMT_ELF
 {load_elf_binary, load_elf_library},
#endif
#ifdef CONFIG_BINFMT_COFF
 {load_coff_binary, load_coff_library},
#endif
 {NULL, NULL}
};
/*
 * These are the functions used to load a.out style executables and shared
 * libraries.  There is no binary dependent code anywhere else.
 */
//a.out执行文件的格式
//  8 struct exec
//  9 {
// 10   unsigned long a_info;         /* Use macros N_MAGIC, etc for access */
// 11   unsigned a_text;              /* length of text, in bytes */
// 12   unsigned a_data;              /* length of data, in bytes */
// 13   unsigned a_bss;               /* length of uninitialized data area for file, in bytes */
// 14   unsigned a_syms;              /* length of symbol table data in file, in bytes */
// 15   unsigned a_entry;             /* start address */
// 16   unsigned a_trsize;            /* length of relocation info for text, in bytes */
// 17   unsigned a_drsize;            /* length of relocation info for data, in bytes */
// 18 };
int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
{
 struct exec ex;
 struct file * file;
 int fd, error;
 unsigned long p = bprm->p;
 ex = *((struct exec *) bprm->buf);  /* exec-header */
 if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC &&
      N_MAGIC(ex) != QMAGIC) ||
     ex.a_trsize || ex.a_drsize || //重定位信息,可能对这些文件都是不需要重定位信息 :-(
     bprm->inode->i_size < ex.a_text+ex.a_data+ex.a_syms+N_TXTOFF(ex)) { 
  return -ENOEXEC;
 }
 if (N_MAGIC(ex) == ZMAGIC &&
     (N_TXTOFF(ex) < bprm->inode->i_sb->s_blocksize)) { 
  printk("N_TXTOFF < BLOCK_SIZE. Please convert binary.");
  return -ENOEXEC;
 }
 //若文件则使用了ZMAGIC类型的a.out格式,
 //下面的链接库也是检测了的
 if (N_TXTOFF(ex) != BLOCK_SIZE && N_MAGIC(ex) == ZMAGIC) {
  printk("N_TXTOFF != BLOCK_SIZE. See a.out.h.");
  return -ENOEXEC;
 }
 
 /* OK, This is the point of no return */
 flush_old_exec(bprm);
 current->end_code = N_TXTADDR(ex) + ex.a_text;
 current->end_data = ex.a_data + current->end_code;
 current->start_brk = current->brk = current->end_data; //brk的结尾就是数据段的结尾,当然分配内存空间可以改变
 current->start_code += N_TXTADDR(ex);
 current->rss = 0;
 current->suid = current->euid = bprm->e_uid;
 current->mmap = NULL;
 current->executable = NULL;  /* for OMAGIC files */
 current->sgid = current->egid = bprm->e_gid;
 if (N_MAGIC(ex) == OMAGIC) {
  do_mmap(NULL, 0, ex.a_text+ex.a_data, //映射代码段和数据段
   PROT_READ|PROT_WRITE|PROT_EXEC,
   MAP_FIXED|MAP_PRIVATE, 0);
  read_exec(bprm->inode, 32, (char *) 0, ex.a_text+ex.a_data); //跳过固定的32字节的aout头部,后面紧跟的是代码段
 } else { //Z N 都是代码段和数据段分页面加载的,所以需要对对齐
  if (ex.a_text & 0xfff || ex.a_data & 0xfff)
   printk("%s: executable not page aligned\n", current->comm);
  
  fd = open_inode(bprm->inode, O_RDONLY);
  
  if (fd < 0)
   return fd;
  file = current->filp[fd];
  if (!file->f_op || !file->f_op->mmap)
  {
   sys_close(fd);
   do_mmap(NULL, 0, ex.a_text+ex.a_data,
    PROT_READ|PROT_WRITE|PROT_EXEC,
    MAP_FIXED|MAP_PRIVATE, 0);
   read_exec(bprm->inode, N_TXTOFF(ex),
      (char *) N_TXTADDR(ex), ex.a_text+ex.a_data);
   goto beyond_if;
  }
  //这里是映射只读代码段
  error = do_mmap(file, N_TXTADDR(ex), ex.a_text,
    PROT_READ | PROT_EXEC,
    MAP_FIXED | MAP_SHARED, N_TXTOFF(ex));
  if (error != N_TXTADDR(ex)) {
   sys_close(fd);
   send_sig(SIGSEGV, current, 0);
   return 0;
  };
  //映射可读写的数据段
   error = do_mmap(file, N_TXTADDR(ex) + ex.a_text, ex.a_data,
    PROT_READ | PROT_WRITE | PROT_EXEC,
    MAP_FIXED | MAP_PRIVATE, N_TXTOFF(ex) + ex.a_text);
  sys_close(fd);
  if (error != N_TXTADDR(ex) + ex.a_text) {
   send_sig(SIGSEGV, current, 0);
   return 0;
  };
  current->executable = bprm->inode;
  bprm->inode->i_count++;
 }
beyond_if:
 sys_brk(current->brk+ex.a_bss); //增加bss到数据段中
 
 p += change_ldt(ex.a_text,bprm->page);
 p -= MAX_ARG_PAGES*PAGE_SIZE;
 p = (unsigned long) create_tables((char *)p,bprm->argc,bprm->envc,0);
 current->start_stack = p;
 regs->eip = ex.a_entry;  /* eip, magic happens :-) */
 regs->esp = p;   /* stack pointer */
 if (current->flags & PF_PTRACED)
  send_sig(SIGTRAP, current, 0);
 return 0;
}
//a.out是由OMAGIC, NMAGIC, QMAGIC, 或者 ZMAGIC发展过来的一种可执行文件格式.
//OMAGIC :在文件头之后就是各个段,数据段和代码段一块存放.
//NMAGIC:和OMAGIC差不多,但是数据段在代码段之后加载之后就立即加载数据段,并且将代码段标示位只读.
//ZMAGIC:只是增加了对页需求的支持.代码段和头部是分开的页面
//QMAGIC:这种格式可以将文件头和代码段的第一页合并起来,这样可以节省空间.
//所以对于ZMAGIC类型,程序执行的入口地址是页面对齐的;而对于QMAGIC类型执行的
//地址是偏移了32字节的头部的
int load_aout_library(int fd)
{
    struct file * file;
 struct exec ex;
 struct  inode * inode;
 unsigned int len;
 unsigned int bss;
 unsigned int start_addr;
 int error;
 
 file = current->filp[fd];
 inode = file->f_inode;
 
 set_fs(KERNEL_DS);
 if (file->f_op->read(inode, file, (char *) &ex, sizeof(ex)) != sizeof(ex)) {
  return -EACCES;
 }
 set_fs(USER_DS);
 
 /* We come in here for the regular a.out style of shared libraries */
 if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != QMAGIC) || ex.a_trsize || 
     ex.a_drsize || ((ex.a_entry & 0xfff) && N_MAGIC(ex) == ZMAGIC) || 
     inode->i_size < ex.a_text+ex.a_data+ex.a_syms+N_TXTOFF(ex)) {
  return -ENOEXEC;
 }
 if (N_MAGIC(ex) == ZMAGIC && N_TXTOFF(ex) &&
     (N_TXTOFF(ex) < inode->i_sb->s_blocksize)) {
  printk("N_TXTOFF < BLOCK_SIZE. Please convert library\n"); //should be equal
  return -ENOEXEC;
 }
 
 if (N_FLAGS(ex)) return -ENOEXEC;
 /* For  QMAGIC, the starting address is 0x20 into the page.  We mask
    this off to get the starting address for the page */
 start_addr =  ex.a_entry & 0xfffff000;  //页面地址  /* Now use mmap to map the library into memory. */
 error = do_mmap(file, start_addr, ex.a_text + ex.a_data,
   PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE,
   N_TXTOFF(ex));
 if (error != start_addr)
  return error;
 len = PAGE_ALIGN(ex.a_text + ex.a_data);
 bss = ex.a_text + ex.a_data + ex.a_bss;
 if (bss > len)  //需要增加映射空间
  do_mmap(NULL, start_addr + len, bss-len,
   PROT_READ|PROT_WRITE|PROT_EXEC,
   MAP_PRIVATE|MAP_FIXED, 0);
 return 0;
}
  文档地址:http://blogimg.chinaunix.net/blog/upfile2/090524204040.pdf
相关阅读 更多 +
排行榜 更多 +
Unity Connect

Unity Connect

学习教育 下载
青橙记录本

青橙记录本

商务办公 下载
脑洞惊魂夜

脑洞惊魂夜

休闲益智 下载