在Linux内核源代码情景分析-从路径名到目标节点,一文中path_walk代码中,err = permission(inode, MAY_EXEC)当前进程是否可以访问这个节点,代码如下:
int permission(struct inode * inode,int mask)
{
if (inode->i_op && inode->i_op->permission) {
int retval;
lock_kernel();
retval = inode->i_op->permission(inode, mask);
unlock_kernel();
return retval;
}
return vfs_permission(inode, mask);
}
在ext2_read_inode中,i_op可以设置为ext2_file_inode_operations,ext2_dir_inode_operations,ext2_fast_symlink_inode_operations,page_symlink_inode_operations,均没有permission指针。所以执行vfs_permission,代码如下:
int vfs_permission(struct inode * inode,int mask)
{
int mode = inode->i_mode;
if ((mask & S_IWOTH) && IS_RDONLY(inode) &&
(S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
return -EROFS; //如果要求的写文件,并且是只读系统,而且是常规文件,目录,或者链接时,返回-EROFS,表示不能访问
if ((mask & S_IWOTH) && IS_IMMUTABLE(inode)) //Nobody gets write access to an immutable file
return -EACCES;
if (current->fsuid == inode->i_uid)//如果当前进程的fsuid和inode结构的i_uid相等,那么应该比较mode中S_IRUSR、S_IWUSR、S_IXUSR位
mode >>= 6;
else if (in_group_p(inode->i_gid))//如果当前进程的fsgid和inode结构的i_gid相等,那么应该比较mode中S_IRGRP、S_IWGRP、S_IXGRP位
mode >>= 3;
if (((mode & mask & S_IRWXO) == mask) || capable(CAP_DAC_OVERRIDE))//mask相当于要求,mode是现有的当前用户可以访问的权限,如果相等就返回0。假如不相等,如果当前进程得到了授权,允许其CAP_DAC_OVERRIDE,即可以凌驾于文件系统的访问权限控制机制DAC之上。
return 0;
/* read and search access */
if ((mask == S_IROTH) ||
(S_ISDIR(inode->i_mode) && !(mask & ~(S_IROTH | S_IXOTH))))
if (capable(CAP_DAC_READ_SEARCH))
return 0;
return -EACCES;
}
其中mask为:
#define MAY_EXEC 1
#define MAY_WRITE 2
#define MAY_READ 4
inode->i_mode为:
用于对三种不同用户的访问权限:
#define S_IRWXU 00700
#define S_IRUSR 00400
#define S_IWUSR 00200
#define S_IXUSR 00100
#define S_IRWXG 00070
#define S_IRGRP 00040
#define S_IWGRP 00020
#define S_IXGRP 00010
#define S_IRWXO 00007
#define S_IROTH 00004
#define S_IWOTH 00002
#define S_IXOTH 00001
参考这张图就好理解了。
还有三个标志位,是一个状态占一个位:
#define S_ISUID 0004000 #define S_ISGID 0002000 #define S_ISVTX 0001000
现在16位只剩下4位了,表示文件类型,要为每种文件类型都分配一个标志位就不够了,所以表示文件的类型的这4位是编码的。
#define S_IFMT 00170000
#define S_IFSOCK 0140000
#define S_IFLNK 0120000
#define S_IFREG 0100000
#define S_IFBLK 0060000
#define S_IFDIR 0040000
#define S_IFCHR 0020000
#define S_IFIFO 0010000
capable,代码如下:
static inline int capable(int cap) { #if 1 /* ok now */ if (cap_raised(current->cap_effective, cap)) #else if (cap_is_fs_cap(cap) ? current->fsuid == 0 : current->euid == 0) #endif { current->flags |= PF_SUPERPRIV; return 1; } return 0; } #define cap_raised(c, flag) (cap_t(c) & CAP_TO_MASK(flag)) #define cap_t(x) (x) #define CAP_TO_MASK(x) (1 << (x))
current->cap_effective是在sys_execve->do_execve->prepare_binprm初始设置的。
int prepare_binprm(struct linux_binprm *bprm)
{
int mode;
struct inode * inode = bprm->file->f_dentry->d_inode;
mode = inode->i_mode;
/* Huh? We had already checked for MAY_EXEC, WTF do we check this? */
if (!(mode & 0111))/* with at least _one_ execute bit set */
return -EACCES;
if (bprm->file->f_op == NULL)
return -EACCES;
bprm->e_uid = current->euid;
bprm->e_gid = current->egid;
if(!IS_NOSUID(inode)) {
/* Set-uid? */
if (mode & S_ISUID)
bprm->e_uid = inode->i_uid;
/* Set-gid? */
/*
* If setgid is set but no group execute bit then this
* is a candidate for mandatory locking, not a setgid
* executable.
*/
if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))
bprm->e_gid = inode->i_gid;
}
/* We don't have VFS support for capabilities yet */
cap_clear(bprm->cap_inheritable);
cap_clear(bprm->cap_permitted);
cap_clear(bprm->cap_effective);
/* To support inheritance of root-permissions and suid-root
* executables under compatibility mode, we raise all three
* capability sets for the file.
*
* If only the real uid is 0, we only raise the inheritable
* and permitted sets of the executable file.
*/
if (!issecure(SECURE_NOROOT)) {
if (bprm->e_uid == 0 || current->uid == 0) {
cap_set_full(bprm->cap_inheritable);
cap_set_full(bprm->cap_permitted);
}
if (bprm->e_uid == 0)
cap_set_full(bprm->cap_effective);//这里设置的
}
memset(bprm->buf,0,BINPRM_BUF_SIZE);
return kernel_read(bprm->file,0,bprm->buf,BINPRM_BUF_SIZE);