文章详情

  • 游戏榜单
  • 软件榜单
关闭导航
热搜榜
热门下载
热门标签
php爱好者> php文档>Linux文件系统之目录的建立

Linux文件系统之目录的建立

时间:2010-05-09  来源:p2pt

一:前言 在用户空间中,建立目录所用的API为mkdir().它在内核中的系统调用入口是sys_mkdir().今天跟踪一下 函数来分析linux文件系统中目录的建立过程. 二:sys_mkdir() Sys_mkdir()对应的代码如下: asmlinkage long sys_mkdir(const char __user * pathname, int mode) {     int error = 0;      char * tmp;        //把用户空间的值copy到内核空间      tmp = getname(pathname);      error = PTR_ERR(tmp);      if (!IS_ERR(tmp)) {          struct dentry *dentry;          struct nameidata nd;            //先查到它的父目录,看父目录是否存在          error = path_lookup(tmp, LOOKUP_PARENT, &nd);          if (error)               goto out;          //寻找子结点的dentry. 如果没有,则新建之          dentry = lookup_create(&nd, 1);          error = PTR_ERR(dentry);          if (!IS_ERR(dentry)) {               if (!IS_POSIXACL(nd.dentry->d_inode))                    mode &= ~current->fs->umask;               //与具体的文件系统相关的部份               error = vfs_mkdir(nd.dentry->d_inode, dentry, mode);               //减少dentry的引用计数               dput(dentry);          }          up(&nd.dentry->d_inode->i_sem);            //释放临时内存          path_release(&nd); out:          putname(tmp);      }        return error; } 这个函数里面有几个重要的子函数. path_lookup()在前一篇文章中已经分析过了.如果不太了解,请参阅相关的部份. lookup_create()的代码如下: {      struct dentry *dentry;        //防止并发操作,获得信号量      down(&nd->dentry->d_inode->i_sem);      dentry = ERR_PTR(-EEXIST);      //如果之前的查找过程失败      if (nd->last_type != LAST_NORM)          goto fail;        //去掉LOOKUP_PARENT标志      nd->flags &= ~LOOKUP_PARENT;      //在缓存中寻找相应的dentry.如果没有。则新建之      dentry = lookup_hash(&nd->last, nd->dentry);      //创建或者查找失败      if (IS_ERR(dentry))          goto fail;      //如果不是建立一个目录而且文件名字不是以0结尾      //出错退出      if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)          goto enoent;      return dentry; enoent:      dput(dentry);      dentry = ERR_PTR(-ENOENT); fail:      return dentry; } lookup_hash()à __lookup_hash():   static struct dentry * __lookup_hash(struct qstr *name, struct dentry * base, struct nameidata *nd) {      struct dentry * dentry;      struct inode *inode;      int err;        inode = base->d_inode;      //检查是否有相关的权限      err = permission(inode, MAY_EXEC, nd);      dentry = ERR_PTR(err);      if (err)          goto out;        /*       * See if the low-level filesystem might want       * to use its own hash..       */       //如果自定义了hash计算      if (base->d_op && base->d_op->d_hash) {          err = base->d_op->d_hash(base, name);          dentry = ERR_PTR(err);          if (err < 0)               goto out;      }        //从缓存中寻找      dentry = cached_lookup(base, name, nd);      if (!dentry) {          //如果缓存中没有相关项。则新建之          struct dentry *new = d_alloc(base, name);          dentry = ERR_PTR(-ENOMEM);          if (!new)               goto out;          //到具体的文件系统中查找          dentry = inode->i_op->lookup(inode, new, nd);          if (!dentry)               dentry = new;          else               dput(new);      } out:      return dentry; } 值得注意的是:经过上述的操作,返回的dentry有可能是原本就存在的.对这种情况是怎么排除的呢?继续看sys_mkdir()的另一个子函数: int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) {      //对异常情况的排除和权限的检查      int error = may_create(dir, dentry, NULL);        if (error)          return error;        //如果父结点不允许mkdir操作      if (!dir->i_op || !dir->i_op->mkdir)          return -EPERM;        mode &= (S_IRWXUGO|S_ISVTX);      error = security_inode_mkdir(dir, dentry, mode);      if (error)          return error;        DQUOT_INIT(dir);      //调用父结点的mkdir操作      error = dir->i_op->mkdir(dir, dentry, mode);           if (!error) {          //如果成功,通告与之关联的进程          inode_dir_notify(dir, DN_CREATE);          security_inode_post_mkdir(dir,dentry, mode);      }      return error; } 在这里看到,最终会调用父进程的i_op.mkdir操作.另外,对于上面说的相应结点已经存在的情况是在may_create()中检测的: static inline int may_create(struct inode *dir, struct dentry *child,                    struct nameidata *nd) {      //如果欲建结点的inode已经存在      //对于一个新建的dentry.其d_inode指向为空.      if (child->d_inode)          return -EEXIST;      //判断父目录是否已经失效      if (IS_DEADDIR(dir))          return -ENOENT;      //权限检查      return permission(dir,MAY_WRITE | MAY_EXEC, nd); } Mkdir的大体架构就如此了.下面讨论一下rootfs和ext2中的目录创建.   三:rootfs的目录创建 在前一篇文章分析到.挂载rootfs时,对文件系统根目录的inode.i_op赋值如下: static struct inode_operations ramfs_dir_inode_operations = {      .create       = ramfs_create,      .lookup       = simple_lookup,      .link         = simple_link,      .unlink       = simple_unlink,      .symlink = ramfs_symlink,      .mkdir        = ramfs_mkdir,      .rmdir        = simple_rmdir,      .mknod        = ramfs_mknod,      .rename       = simple_rename, }; 对应的mkdir操作入口是ramfs_mkdir(): static int ramfs_mkdir(struct inode * dir, struct dentry * dentry, int mode) {      //创建结点      int retval = ramfs_mknod(dir, dentry, mode | S_IFDIR, 0);      //如果创建成功,更新i_nlink计数      if (!retval)          dir->i_nlink++;      return retval; } Ramsf_mknod()的代码如下: static int ramfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) {      //在文件系统中分其为配一个inode      struct inode * inode = ramfs_get_inode(dir->i_sb, mode, dev);      int error = -ENOSPC;             if (inode) {          //如果分配成功          if (dir->i_mode & S_ISGID) {               inode->i_gid = dir->i_gid;               if (S_ISDIR(mode))                    inode->i_mode |= S_ISGID;          }          //将dentry与分配的inode关联起来          d_instantiate(dentry, inode);          //增加dentry的引用计数          dget(dentry); /* Extra count - pin the dentry in core */          error = 0;      }      return error; } 这个函数中的子函数我们都在前面已经分析过.请自行查阅本站的其它文档.其操作非常简单。就是分配一个inode。然后将inode 与dentry建立关联.因为rootfs是一个基于RAM的文件系统。其inode的分配就是在内存中创建一个inode空间,然后为其各项操作赋值而已.   四:ext2中的目录创建 经过上一章的分析可以看到.ext2文件系统根目录的inode.i_op被赋值为ext2_dir_inode_operations.其结构如下所示: struct inode_operations ext2_dir_inode_operations = {      .create       = ext2_create,      .lookup       = ext2_lookup,      .link         = ext2_link,      .unlink       = ext2_unlink,      .symlink = ext2_symlink,      .mkdir        = ext2_mkdir,      .rmdir        = ext2_rmdir,      .mknod        = ext2_mknod,      .rename       = ext2_rename, #ifdef CONFIG_EXT2_FS_XATTR      .setxattr = generic_setxattr,      .getxattr = generic_getxattr,      .listxattr    = ext2_listxattr,      .removexattr  = generic_removexattr, #endif      .setattr = ext2_setattr,      .permission   = ext2_permission, } Mkdir对应的入口为ext2_mkdir().代码如下: static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode) {      struct inode * inode;      int err = -EMLINK;        if (dir->i_nlink >= EXT2_LINK_MAX)          goto out;        //增加dir的引用计数,并将其置为"脏"      ext2_inc_count(dir);        //在文件系统中分配一个inode      inode = ext2_new_inode (dir, S_IFDIR | mode);      err = PTR_ERR(inode);      if (IS_ERR(inode))          goto out_dir;        //为inode的各项操作赋值      inode->i_op = &ext2_dir_inode_operations;      inode->i_fop = &ext2_dir_operations;        //为inode对应的i_mapping赋值      if (test_opt(inode->i_sb, NOBH))          inode->i_mapping->a_ops = &ext2_nobh_aops;      else          inode->i_mapping->a_ops = &ext2_aops;        //增加inode的引用计数,并将其置为"脏"      ext2_inc_count(inode);        //对目录结点的初始化      err = ext2_make_empty(inode, dir);      if (err)          goto out_fail;        //更新父目录,使inode加入父目录      err = ext2_add_link(dentry, inode);      if (err)          goto out_fail;        //使dentry和inode建立关联      d_instantiate(dentry, inode); out:      return err;   out_fail:      ext2_dec_count(inode);      ext2_dec_count(inode);      iput(inode); out_dir:      ext2_dec_count(dir);      goto out; } 逐个分析上面所涉及到的子函数. 在ext2中分配一个inode是由ext2_new_inode()完成的.它的代码如下:   struct inode *ext2_new_inode(struct inode *dir, int mode) {      struct super_block *sb;      struct buffer_head *bitmap_bh = NULL;      struct buffer_head *bh2;      int group, i;      ino_t ino = 0;      struct inode * inode;      struct ext2_group_desc *gdp;      struct ext2_super_block *es;      struct ext2_inode_info *ei;      struct ext2_sb_info *sbi;      int err;        sb = dir->i_sb;      //分配一个inode      inode = new_inode(sb);      if (!inode)          return ERR_PTR(-ENOMEM);        //inode的私有结构      ei = EXT2_I(inode);      //super_block中的ext2私有结构      sbi = EXT2_SB(sb);        //ext2的super_block      es = sbi->s_es;        //寻找一个合适的组来分配inode      if (S_ISDIR(mode)) {          if (test_opt(sb, OLDALLOC))               group = find_group_dir(sb, dir);          else               group = find_group_orlov(sb, dir);      } else          group = find_group_other(sb, dir);        if (group == -1) {          err = -ENOSPC;          goto fail;      }        //遍历组描述符      for (i = 0; i < sbi->s_groups_count; i++) {          //group对应的组开始遍历            //取得组描述符          gdp = ext2_get_group_desc(sb, group, &bh2);            //释放bitmap_bh.已经后面会使用这个临时变量          brelse(bitmap_bh);            //取得组描述符里的inode位图          bitmap_bh = read_inode_bitmap(sb, group);          if (!bitmap_bh) {               err = -EIO;               goto fail;          }          ino = 0;   repeat_in_this_group:          //寻找位图中第一个没有使用的位          ino = ext2_find_next_zero_bit((unsigned long *)bitmap_bh->b_data,                              EXT2_INODES_PER_GROUP(sb), ino);          //如果找到的位大于块组中的inode数.那从group之后的块组中分配          if (ino >= EXT2_INODES_PER_GROUP(sb)) {               /*                * Rare race: find_group_xx() decided that there were                * free inodes in this group, but by the time we tried                * to allocate one, they're all gone.  This can also                * occur because the counters which find_group_orlov()                * uses are approximate.  So just go and search the                * next block group.                */                //已经到达块组数目最大值。则将其置为零.然后重新循环               if (++group == sbi->s_groups_count)                    group = 0;               continue;          }            //将inode 位图中的分配位置位          if (ext2_set_bit_atomic(sb_bgl_lock(sbi, group),                             ino, bitmap_bh->b_data)) {               /* we lost this inode */               //如果该位已经被置位了.说明其它的内核控制路径将其分配了.               //那就找它的下一个没有被使用的inode                 //如果下一个超过了这个组中的最大inode数目。那从下一个块组中分配               if (++ino >= EXT2_INODES_PER_GROUP(sb)) {                    /* this group is exhausted, try next group */                    if (++group == sbi->s_groups_count)                        group = 0;                    continue;               }               /* try to find free inode in the same group */               //重新从块组中寻找没有被使用的inode               goto repeat_in_this_group;          }            //如果运行到这里的话,说明分配成功了          goto got;      }        /*       * Scanned all blockgroups.       */      err = -ENOSPC;      goto fail; got:      mark_buffer_dirty(bitmap_bh);      if (sb->s_flags & MS_SYNCHRONOUS)          sync_dirty_buffer(bitmap_bh);      brelse(bitmap_bh);        //将块组中的inode序号转换为全局inode计数      ino += group * EXT2_INODES_PER_GROUP(sb) + 1;        //如果inode序号小于super_block的超始inode序号或者大于inode总数      //出错退出      if (ino < EXT2_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {          ext2_error (sb, "ext2_new_inode",                   "reserved inode or inode > inodes count - "                   "block_group = %d,inode=%lu", group,                   (unsigned long) ino);          err = -EIO;          goto fail;      }        //更新统计计数      percpu_counter_mod(&sbi->s_freeinodes_counter, -1);      if (S_ISDIR(mode))          percpu_counter_inc(&sbi->s_dirs_counter);        spin_lock(sb_bgl_lock(sbi, group));           gdp->bg_free_inodes_count =                 cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1);        //更新s_debts      if (S_ISDIR(mode)) {          if (sbi->s_debts[group] < 255)               sbi->s_debts[group]++;          gdp->bg_used_dirs_count =               cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1);      } else {          if (sbi->s_debts[group])               sbi->s_debts[group]--;      }      spin_unlock(sb_bgl_lock(sbi, group));        sb->s_dirt = 1;      mark_buffer_dirty(bh2);      inode->i_uid = current->fsuid;      if (test_opt (sb, GRPID))          inode->i_gid = dir->i_gid;      else if (dir->i_mode & S_ISGID) {          inode->i_gid = dir->i_gid;          if (S_ISDIR(mode))               mode |= S_ISGID;      } else          inode->i_gid = current->fsgid;      inode->i_mode = mode;        //更新inode表示的索引结点号      inode->i_ino = ino;      inode->i_blksize = PAGE_SIZE;    /* This is the optimal IO size (for stat), not the fs block size */      inode->i_blocks = 0;      //使i_mtine,i_atime,i_ctime置为当前时间      inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;      memset(ei->i_data, 0, sizeof(ei->i_data));      ei->i_flags = EXT2_I(dir)->i_flags & ~EXT2_BTREE_FL;      if (S_ISLNK(mode))          ei->i_flags &= ~(EXT2_IMMUTABLE_FL|EXT2_APPEND_FL);      /* dirsync is only applied to directories */      if (!S_ISDIR(mode))          ei->i_flags &= ~EXT2_DIRSYNC_FL;      ei->i_faddr = 0;      ei->i_frag_no = 0;      ei->i_frag_size = 0;      ei->i_file_acl = 0;      ei->i_dir_acl = 0;      ei->i_dtime = 0;      ei->i_block_group = group;      ei->i_next_alloc_block = 0;      ei->i_next_alloc_goal = 0;      ei->i_prealloc_block = 0;      ei->i_prealloc_count = 0;      ei->i_dir_start_lookup = 0;      ei->i_state = EXT2_STATE_NEW;      ext2_set_inode_flags(inode);      spin_lock(&sbi->s_next_gen_lock);      inode->i_generation = sbi->s_next_generation++;      spin_unlock(&sbi->s_next_gen_lock);      insert_inode_hash(inode);        if (DQUOT_ALLOC_INODE(inode)) {          DQUOT_DROP(inode);          err = -ENOSPC;          goto fail2;      }      err = ext2_init_acl(inode, dir);      if (err) {          DQUOT_FREE_INODE(inode);          goto fail2;      }      //置inode为“脏”      mark_inode_dirty(inode);      ext2_debug("allocating inode %lu\n", inode->i_ino);      ext2_preread_inode(inode);      return inode;   fail2:      inode->i_flags |= S_NOQUOTA;      inode->i_nlink = 0;      iput(inode);      return ERR_PTR(err);   fail:      make_bad_inode(inode);      iput(inode);      return ERR_PTR(err); } 查找一个末使用的索引结点有一个规则,就是尽量使每个块组达到平衡.所以linux在ext2_sb_info结构中加了一个s_debts字段.用来表示每个块组中的文件与目录的分配情况.计算的方法是在此find_group_orlov(目录)和find_group_other(其它类型的文件)中完成的. 每个页面都包含两个特殊目录结构 “.”和 “..”.单点代表其本身,双点代表父目录.这个过程是在ext2_make_empty()中完成的.对应代码如下: int ext2_make_empty(struct inode *inode, struct inode *parent) {      struct address_space *mapping = inode->i_mapping;      //找到页面映射所代表的首个页面      struct page *page = grab_cache_page(mapping, 0);      unsigned chunk_size = ext2_chunk_size(inode);      struct ext2_dir_entry_2 * de;      int err;      void *kaddr;        if (!page)          return -ENOMEM;        //先调用prepare_write().因为之后会将page写到文件系统中去      err = mapping->a_ops->prepare_write(NULL, page, 0, chunk_size);      if (err) {          unlock_page(page);          goto fail;      }      //将page临时映射到内核      kaddr = kmap_atomic(page, KM_USER0);        //目录中的第一个文件对象      de = (struct ext2_dir_entry_2 *)kaddr;      //每个目录中都有两个默认存在的对象.和..        //将'.'加至目录中,其inode结点号指向其本身      de->name_len = 1;      de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(1));      memcpy (de->name, ".\0\0", 4);      de->inode = cpu_to_le32(inode->i_ino);      ext2_set_de_type (de, inode);        //设置'..'.使其指向父目录      de = (struct ext2_dir_entry_2 *)(kaddr + EXT2_DIR_REC_LEN(1));      de->name_len = 2;      de->rec_len = cpu_to_le16(chunk_size - EXT2_DIR_REC_LEN(1));      de->inode = cpu_to_le32(parent->i_ino);      memcpy (de->name, "..\0", 4);      ext2_set_de_type (de, inode);        //释放掉映射区间      kunmap_atomic(kaddr, KM_USER0);      //将更改的页面提交到文件系统      err = ext2_commit_chunk(page, 0, chunk_size); fail:      //页面使用完了,减少其使用计数      page_cache_release(page);      return err; } 初始完成之后,要将子目录插入父目录所表的空间的。它是由ext2_add_link()完成的。代码如下: int ext2_add_link (struct dentry *dentry, struct inode *inode) {      //得到父目录的inode      struct inode *dir = dentry->d_parent->d_inode;      const char *name = dentry->d_name.name;      int namelen = dentry->d_name.len;      unsigned chunk_size = ext2_chunk_size(dir);      unsigned reclen = EXT2_DIR_REC_LEN(namelen);      unsigned short rec_len, name_len;      struct page *page = NULL;      ext2_dirent * de;        //父目录结点大小所占的页面数      unsigned long npages = dir_pages(dir);      unsigned long n;      char *kaddr;      unsigned from, to;      int err;        /*       * We take care of directory expansion in the same loop.       * This code plays outside i_size, so it locks the page       * to protect that region.       */        //遍历结点所在的空间      for (n = 0; n <= npages; n++) {          char *dir_end;            page = ext2_get_page(dir, n);          err = PTR_ERR(page);          if (IS_ERR(page))               goto out;          lock_page(page);          kaddr = page_address(page);            //本页面的最后的位置.                   //ext2_last_byte: 如果剩余的长度大于一个页面,则返回一个页面大小.否则返回剩余空间大小          dir_end = kaddr + ext2_last_byte(dir, n);          de = (ext2_dirent *)kaddr;          kaddr += PAGE_CACHE_SIZE - reclen;          while ((char *)de <= kaddr) {               //到了结点空间的末尾               if ((char *)de == dir_end) {                    /* We hit i_size */                    name_len = 0;                    rec_len = chunk_size;                    de->rec_len = cpu_to_le16(chunk_size);                    de->inode = 0;                    goto got_it;               }                 //目录中文件所占空间长度为0.非法               if (de->rec_len == 0) {                    ext2_error(dir->i_sb, __FUNCTION__,                        "zero-length directory entry");                    err = -EIO;                    goto out_unlock;               }               err = -EEXIST;               //在目录所包含的文件中,含有同名的结点               if (ext2_match (namelen, name, de))                    goto out_unlock;               name_len = EXT2_DIR_REC_LEN(de->name_len);               rec_len = le16_to_cpu(de->rec_len);               //de->inode==0.表示目录中的此结点被删除               //rec_len >= reclen:表示旧结点中有足够的空间存储新的结点               if (!de->inode && rec_len >= reclen)                    goto got_it;               //这个结点中有空间剩余.(可能是它后面有节点被删除造成的)               if (rec_len >= name_len + reclen)                    goto got_it;               de = (ext2_dirent *) ((char *) de + rec_len);          }          unlock_page(page);          ext2_put_page(page);      }      BUG();      return -EINVAL;   got_it:      from = (char*)de - (char*)page_address(page);      to = from + rec_len;      err = page->mapping->a_ops->prepare_write(NULL, page, from, to);      if (err)          goto out_unlock;      if (de->inode) {          //这是属于结点空间有剩余的情况          //即在空间中插入一个新的结点          ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len);          de1->rec_len = cpu_to_le16(rec_len - name_len);          de->rec_len = cpu_to_le16(name_len);          de = de1;      }      //对目录的相关项进行赋值      de->name_len = namelen;      memcpy (de->name, name, namelen);      de->inode = cpu_to_le32(inode->i_ino);      ext2_set_de_type (de, inode);      //提交所做的修改,将其写入文件系统      err = ext2_commit_chunk(page, from, to);      //更改时间戳      dir->i_mtime = dir->i_ctime = CURRENT_TIME;      EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL;      mark_inode_dirty(dir);      /* OFFSET_CACHE */ out_put:      ext2_put_page(page); out:      return err; out_unlock:      unlock_page(page);      goto out_put; } 在这里,忽略了页面映射与文件系统驱动的交互过程。关于页面缓存后续再给出章节进行分析.   五:小结 在这一节里,以rootfs和ext2文件系统为例分析了目录的建立过程.只要对ext2文件系统的相关部分有所了解.理解这部份代码并不难.其中关于页面缓存部份以后再给出专题分析.详情请关注本站更新.
相关阅读 更多 +
排行榜 更多 +
马里奥赛车世界游戏手机版下载

马里奥赛车世界游戏手机版下载

赛车竞速 下载
无畏契约皮肤开箱器手游下载

无畏契约皮肤开箱器手游下载

休闲益智 下载
旭日之城官方正版下载

旭日之城官方正版下载

策略塔防 下载