#include <linux/btrfs.h> #include <linux/capability.h> #include <linux/cred.h> #include <linux/mount.h> #include <linux/fdtable.h> #include <linux/file.h> #include <linux/fs.h> #include <linux/namei.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/magic.h> #include <linux/parser.h> #include <linux/security.h> #include <linux/seq_file.h> #include <linux/statfs.h> #include <linux/slab.h> #include <linux/user_namespace.h> #include <linux/uidgid.h> #include <linux/xattr.h> #include <linux/posix_acl.h> #include <linux/posix_acl_xattr.h> #include <linux/uio.h> struct shiftfs_super_info { struct vfsmount *mnt; struct user_namespace *userns; /* creds of process who created the super block */ const struct cred *creator_cred; bool mark; unsigned int passthrough; unsigned int passthrough_mark; }; static void shiftfs_fill_inode(struct inode *inode, unsigned long ino, umode_t mode, dev_t dev, struct dentry *dentry); #define SHIFTFS_PASSTHROUGH_NONE 0 #define SHIFTFS_PASSTHROUGH_STAT 1 #define SHIFTFS_PASSTHROUGH_IOCTL 2 #define SHIFTFS_PASSTHROUGH_ALL \ (SHIFTFS_PASSTHROUGH_STAT | SHIFTFS_PASSTHROUGH_IOCTL) static inline bool shiftfs_passthrough_ioctls(struct shiftfs_super_info *info) { if (!(info->passthrough & SHIFTFS_PASSTHROUGH_IOCTL)) return false; return true; } static inline bool shiftfs_passthrough_statfs(struct shiftfs_super_info *info) { if (!(info->passthrough & SHIFTFS_PASSTHROUGH_STAT)) return false; return true; } enum { OPT_MARK, OPT_PASSTHROUGH, OPT_LAST, }; /* global filesystem options */ static const match_table_t tokens = { { OPT_MARK, "mark" }, { OPT_PASSTHROUGH, "passthrough=%u" }, { OPT_LAST, NULL } }; static const struct cred *shiftfs_override_creds(const struct super_block *sb) { struct shiftfs_super_info *sbinfo = sb->s_fs_info; return override_creds(sbinfo->creator_cred); } static inline void shiftfs_revert_object_creds(const struct cred *oldcred, struct cred *newcred) { revert_creds(oldcred); put_cred(newcred); } static kuid_t shift_kuid(struct user_namespace *from, struct user_namespace *to, kuid_t kuid) { uid_t uid = from_kuid(from, kuid); return make_kuid(to, uid); } static kgid_t shift_kgid(struct user_namespace *from, struct user_namespace *to, kgid_t kgid) { gid_t gid = from_kgid(from, kgid); return make_kgid(to, gid); } static int shiftfs_override_object_creds(const struct super_block *sb, const struct cred **oldcred, struct cred **newcred, struct dentry *dentry, umode_t mode, bool hardlink) { struct shiftfs_super_info *sbinfo = sb->s_fs_info; kuid_t fsuid = current_fsuid(); kgid_t fsgid = current_fsgid(); *oldcred = shiftfs_override_creds(sb); *newcred = prepare_creds(); if (!*newcred) { revert_creds(*oldcred); return -ENOMEM; } (*newcred)->fsuid = shift_kuid(sb->s_user_ns, sbinfo->userns, fsuid); (*newcred)->fsgid = shift_kgid(sb->s_user_ns, sbinfo->userns, fsgid); if (!hardlink) { int err = security_dentry_create_files_as(dentry, mode, &dentry->d_name, *oldcred, *newcred); if (err) { shiftfs_revert_object_creds(*oldcred, *newcred); return err; } } put_cred(override_creds(*newcred)); return 0; } static void shiftfs_copyattr(struct inode *from, struct inode *to) { struct user_namespace *from_ns = from->i_sb->s_user_ns; struct user_namespace *to_ns = to->i_sb->s_user_ns; to->i_uid = shift_kuid(from_ns, to_ns, from->i_uid); to->i_gid = shift_kgid(from_ns, to_ns, from->i_gid); to->i_mode = from->i_mode; to->i_atime = from->i_atime; to->i_mtime = from->i_mtime; to->i_ctime = from->i_ctime; i_size_write(to, i_size_read(from)); } static void shiftfs_copyflags(struct inode *from, struct inode *to) { unsigned int mask = S_SYNC | S_IMMUTABLE | S_APPEND | S_NOATIME; inode_set_flags(to, from->i_flags & mask, mask); } static void shiftfs_file_accessed(struct file *file) { struct inode *upperi, *loweri; if (file->f_flags & O_NOATIME) return; upperi = file_inode(file); loweri = upperi->i_private; if (!loweri) return; upperi->i_mtime = loweri->i_mtime; upperi->i_ctime = loweri->i_ctime; touch_atime(&file->f_path); } static int shiftfs_parse_mount_options(struct shiftfs_super_info *sbinfo, char *options) { char *p; substring_t args[MAX_OPT_ARGS]; sbinfo->mark = false; sbinfo->passthrough = 0; while ((p = strsep(&options, ",")) != NULL) { int err, intarg, token; if (!*p) continue; token = match_token(p, tokens, args); switch (token) { case OPT_MARK: sbinfo->mark = true; break; case OPT_PASSTHROUGH: err = match_int(&args[0], &intarg); if (err) return err; if (intarg & ~SHIFTFS_PASSTHROUGH_ALL) return -EINVAL; sbinfo->passthrough = intarg; break; default: return -EINVAL; } } return 0; } static void shiftfs_d_release(struct dentry *dentry) { struct dentry *lowerd = dentry->d_fsdata; if (lowerd) dput(lowerd); } static struct dentry *shiftfs_d_real(struct dentry *dentry, const struct inode *inode) { struct dentry *lowerd = dentry->d_fsdata; if (inode && d_inode(dentry) == inode) return dentry; lowerd = d_real(lowerd, inode); if (lowerd && (!inode || inode == d_inode(lowerd))) return lowerd; WARN(1, "shiftfs_d_real(%pd4, %s:%lu): real dentry not found\n", dentry, inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0); return dentry; } static int shiftfs_d_weak_revalidate(struct dentry *dentry, unsigned int flags) { int err = 1; struct dentry *lowerd = dentry->d_fsdata; if (d_is_negative(lowerd) != d_is_negative(dentry)) return 0; if ((lowerd->d_flags & DCACHE_OP_WEAK_REVALIDATE)) err = lowerd->d_op->d_weak_revalidate(lowerd, flags); if (d_really_is_positive(dentry)) { struct inode *inode = d_inode(dentry); struct inode *loweri = d_inode(lowerd); shiftfs_copyattr(loweri, inode); } return err; } static int shiftfs_d_revalidate(struct dentry *dentry, unsigned int flags) { int err = 1; struct dentry *lowerd = dentry->d_fsdata; if (d_unhashed(lowerd) || ((d_is_negative(lowerd) != d_is_negative(dentry)))) return 0; if (flags & LOOKUP_RCU) return -ECHILD; if ((lowerd->d_flags & DCACHE_OP_REVALIDATE)) err = lowerd->d_op->d_revalidate(lowerd, flags); if (d_really_is_positive(dentry)) { struct inode *inode = d_inode(dentry); struct inode *loweri = d_inode(lowerd); shiftfs_copyattr(loweri, inode); } return err; } static const struct dentry_operations shiftfs_dentry_ops = { .d_release = shiftfs_d_release, .d_real = shiftfs_d_real, .d_revalidate = shiftfs_d_revalidate, .d_weak_revalidate = shiftfs_d_weak_revalidate, }; static const char *shiftfs_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { const char *p; const struct cred *oldcred; struct dentry *lowerd; /* RCU lookup not supported */ if (!dentry) return ERR_PTR(-ECHILD); lowerd = dentry->d_fsdata; oldcred = shiftfs_override_creds(dentry->d_sb); p = vfs_get_link(lowerd, done); revert_creds(oldcred); return p; } static int shiftfs_setxattr(struct dentry *dentry, struct inode *inode, const char *name, const void *value, size_t size, int flags) { struct dentry *lowerd = dentry->d_fsdata; int err; const struct cred *oldcred; oldcred = shiftfs_override_creds(dentry->d_sb); err = vfs_setxattr(lowerd, name, value, size, flags); revert_creds(oldcred); shiftfs_copyattr(lowerd->d_inode, inode); return err; } static int shiftfs_xattr_get(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, void *value, size_t size) { struct dentry *lowerd = dentry->d_fsdata; int err; const struct cred *oldcred; oldcred = shiftfs_override_creds(dentry->d_sb); err = vfs_getxattr(lowerd, name, value, size); revert_creds(oldcred); return err; } static ssize_t shiftfs_listxattr(struct dentry *dentry, char *list, size_t size) { struct dentry *lowerd = dentry->d_fsdata; int err; const struct cred *oldcred; oldcred = shiftfs_override_creds(dentry->d_sb); err = vfs_listxattr(lowerd, list, size); revert_creds(oldcred); return err; } static int shiftfs_removexattr(struct dentry *dentry, const char *name) { struct dentry *lowerd = dentry->d_fsdata; int err; const struct cred *oldcred; oldcred = shiftfs_override_creds(dentry->d_sb); err = vfs_removexattr(lowerd, name); revert_creds(oldcred); /* update c/mtime */ shiftfs_copyattr(lowerd->d_inode, d_inode(dentry)); return err; } static int shiftfs_xattr_set(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, const void *value, size_t size, int flags) { if (!value) return shiftfs_removexattr(dentry, name); return shiftfs_setxattr(dentry, inode, name, value, size, flags); } static int shiftfs_inode_test(struct inode *inode, void *data) { return inode->i_private == data; } static int shiftfs_inode_set(struct inode *inode, void *data) { inode->i_private = data; return 0; } static int shiftfs_create_object(struct inode *diri, struct dentry *dentry, umode_t mode, const char *symlink, struct dentry *hardlink, bool excl) { int err; const struct cred *oldcred; struct cred *newcred; void *loweri_iop_ptr = NULL; umode_t modei = mode; struct super_block *dir_sb = diri->i_sb; struct dentry *lowerd_new = dentry->d_fsdata; struct inode *inode = NULL, *loweri_dir = diri->i_private; const struct inode_operations *loweri_dir_iop = loweri_dir->i_op; struct dentry *lowerd_link = NULL; inode_lock_nested(loweri_dir, I_MUTEX_PARENT); if (hardlink) { loweri_iop_ptr = loweri_dir_iop->link; } else { switch (mode & S_IFMT) { case S_IFDIR: loweri_iop_ptr = loweri_dir_iop->mkdir; break; case S_IFREG: loweri_iop_ptr = loweri_dir_iop->create; break; case S_IFLNK: loweri_iop_ptr = loweri_dir_iop->symlink; break; case S_IFSOCK: /* fall through */ case S_IFIFO: loweri_iop_ptr = loweri_dir_iop->mknod; break; } } if (!loweri_iop_ptr) { err = -EINVAL; goto out_iput; } if (!hardlink) { inode = new_inode(dir_sb); if (!inode) { err = -ENOMEM; goto out_iput; } /* * new_inode() will have added the new inode to the super * block's list of inodes. Further below we will call * inode_insert5() Which would perform the same operation again * thereby corrupting the list. To avoid this raise I_CREATING * in i_state which will cause inode_insert5() to skip this * step. I_CREATING will be cleared by d_instantiate_new() * below. */ spin_lock(&inode->i_lock); inode->i_state |= I_CREATING; spin_unlock(&inode->i_lock); inode_init_owner(inode, diri, mode); modei = inode->i_mode; } err = shiftfs_override_object_creds(dentry->d_sb, &oldcred, &newcred, dentry, modei, hardlink != NULL); if (err) goto out_iput; if (hardlink) { lowerd_link = hardlink->d_fsdata; err = vfs_link(lowerd_link, loweri_dir, lowerd_new, NULL); } else { switch (modei & S_IFMT) { case S_IFDIR: err = vfs_mkdir(loweri_dir, lowerd_new, modei); break; case S_IFREG: err = vfs_create(loweri_dir, lowerd_new, modei, excl); break; case S_IFLNK: err = vfs_symlink(loweri_dir, lowerd_new, symlink); break; case S_IFSOCK: /* fall through */ case S_IFIFO: err = vfs_mknod(loweri_dir, lowerd_new, modei, 0); break; default: err = -EINVAL; break; } } shiftfs_revert_object_creds(oldcred, newcred); if (!err && WARN_ON(!lowerd_new->d_inode)) err = -EIO; if (err) goto out_iput; if (hardlink) { inode = d_inode(hardlink); ihold(inode); /* copy up times from lower inode */ shiftfs_copyattr(d_inode(lowerd_link), inode); set_nlink(d_inode(hardlink), d_inode(lowerd_link)->i_nlink); d_instantiate(dentry, inode); } else { struct inode *inode_tmp; struct inode *loweri_new = d_inode(lowerd_new); inode_tmp = inode_insert5(inode, (unsigned long)loweri_new, shiftfs_inode_test, shiftfs_inode_set, loweri_new); if (unlikely(inode_tmp != inode)) { pr_err_ratelimited("shiftfs: newly created inode found in cache\n"); iput(inode_tmp); err = -EINVAL; goto out_iput; } ihold(loweri_new); shiftfs_fill_inode(inode, loweri_new->i_ino, loweri_new->i_mode, 0, lowerd_new); d_instantiate_new(dentry, inode); } shiftfs_copyattr(loweri_dir, diri); if (loweri_iop_ptr == loweri_dir_iop->mkdir) set_nlink(diri, loweri_dir->i_nlink); inode = NULL; out_iput: iput(inode); inode_unlock(loweri_dir); return err; } static int shiftfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { mode |= S_IFREG; return shiftfs_create_object(dir, dentry, mode, NULL, NULL, excl); } static int shiftfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { mode |= S_IFDIR; return shiftfs_create_object(dir, dentry, mode, NULL, NULL, false); } static int shiftfs_link(struct dentry *hardlink, struct inode *dir, struct dentry *dentry) { return shiftfs_create_object(dir, dentry, 0, NULL, hardlink, false); } static int shiftfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { if (!S_ISFIFO(mode) && !S_ISSOCK(mode)) return -EPERM; return shiftfs_create_object(dir, dentry, mode, NULL, NULL, false); } static int shiftfs_symlink(struct inode *dir, struct dentry *dentry, const char *symlink) { return shiftfs_create_object(dir, dentry, S_IFLNK, symlink, NULL, false); } static int shiftfs_rm(struct inode *dir, struct dentry *dentry, bool rmdir) { struct dentry *lowerd = dentry->d_fsdata; struct inode *loweri = dir->i_private; struct inode *inode = d_inode(dentry); int err; const struct cred *oldcred; dget(lowerd); oldcred = shiftfs_override_creds(dentry->d_sb); inode_lock_nested(loweri, I_MUTEX_PARENT); if (rmdir) err = vfs_rmdir(loweri, lowerd); else err = vfs_unlink(loweri, lowerd, NULL); revert_creds(oldcred); if (!err) { d_drop(dentry); if (rmdir) clear_nlink(inode); else drop_nlink(inode); } inode_unlock(loweri); shiftfs_copyattr(loweri, dir); dput(lowerd); return err; } static int shiftfs_unlink(struct inode *dir, struct dentry *dentry) { return shiftfs_rm(dir, dentry, false); } static int shiftfs_rmdir(struct inode *dir, struct dentry *dentry) { return shiftfs_rm(dir, dentry, true); } static int shiftfs_rename(struct inode *olddir, struct dentry *old, struct inode *newdir, struct dentry *new, unsigned int flags) { struct dentry *lowerd_dir_old = old->d_parent->d_fsdata, *lowerd_dir_new = new->d_parent->d_fsdata, *lowerd_old = old->d_fsdata, *lowerd_new = new->d_fsdata, *trapd; struct inode *loweri_dir_old = lowerd_dir_old->d_inode, *loweri_dir_new = lowerd_dir_new->d_inode; int err = -EINVAL; const struct cred *oldcred; trapd = lock_rename(lowerd_dir_new, lowerd_dir_old); if (trapd == lowerd_old || trapd == lowerd_new) goto out_unlock; oldcred = shiftfs_override_creds(old->d_sb); err = vfs_rename(loweri_dir_old, lowerd_old, loweri_dir_new, lowerd_new, NULL, flags); revert_creds(oldcred); shiftfs_copyattr(loweri_dir_old, olddir); shiftfs_copyattr(loweri_dir_new, newdir); out_unlock: unlock_rename(lowerd_dir_new, lowerd_dir_old); return err; } static struct dentry *shiftfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct dentry *new; struct inode *newi; const struct cred *oldcred; struct dentry *lowerd = dentry->d_parent->d_fsdata; struct inode *inode = NULL, *loweri = lowerd->d_inode; inode_lock(loweri); oldcred = shiftfs_override_creds(dentry->d_sb); new = lookup_one_len(dentry->d_name.name, lowerd, dentry->d_name.len); revert_creds(oldcred); inode_unlock(loweri); if (IS_ERR(new)) return new; dentry->d_fsdata = new; newi = new->d_inode; if (!newi) goto out; inode = iget5_locked(dentry->d_sb, (unsigned long)newi, shiftfs_inode_test, shiftfs_inode_set, newi); if (!inode) { dput(new); return ERR_PTR(-ENOMEM); } if (inode->i_state & I_NEW) { /* * inode->i_private set by shiftfs_inode_set(), but we still * need to take a reference */ ihold(newi); shiftfs_fill_inode(inode, newi->i_ino, newi->i_mode, 0, new); unlock_new_inode(inode); } out: return d_splice_alias(inode, dentry); } static int shiftfs_permission(struct inode *inode, int mask) { int err; const struct cred *oldcred; struct inode *loweri = inode->i_private; if (!loweri) { WARN_ON(!(mask & MAY_NOT_BLOCK)); return -ECHILD; } err = generic_permission(inode, mask); if (err) return err; oldcred = shiftfs_override_creds(inode->i_sb); err = inode_permission(loweri, mask); revert_creds(oldcred); return err; } static int shiftfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { int err; const struct cred *oldcred; struct inode *loweri = inode->i_private; if (!loweri->i_op->fiemap) return -EOPNOTSUPP; oldcred = shiftfs_override_creds(inode->i_sb); if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) filemap_write_and_wait(loweri->i_mapping); err = loweri->i_op->fiemap(loweri, fieinfo, start, len); revert_creds(oldcred); return err; } static int shiftfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) { int err; const struct cred *oldcred; struct dentry *lowerd = dentry->d_fsdata; struct inode *loweri = dir->i_private; if (!loweri->i_op->tmpfile) return -EOPNOTSUPP; oldcred = shiftfs_override_creds(dir->i_sb); err = loweri->i_op->tmpfile(loweri, lowerd, mode); revert_creds(oldcred); return err; } static int shiftfs_setattr(struct dentry *dentry, struct iattr *attr) { struct dentry *lowerd = dentry->d_fsdata; struct inode *loweri = lowerd->d_inode; struct iattr newattr; const struct cred *oldcred; struct super_block *sb = dentry->d_sb; struct shiftfs_super_info *sbinfo = sb->s_fs_info; int err; err = setattr_prepare(dentry, attr); if (err) return err; newattr = *attr; newattr.ia_uid = shift_kuid(sb->s_user_ns, sbinfo->userns, attr->ia_uid); newattr.ia_gid = shift_kgid(sb->s_user_ns, sbinfo->userns, attr->ia_gid); /* * mode change is for clearing setuid/setgid bits. Allow lower fs * to interpret this in its own way. */ if (newattr.ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) newattr.ia_valid &= ~ATTR_MODE; inode_lock(loweri); oldcred = shiftfs_override_creds(dentry->d_sb); err = notify_change(lowerd, &newattr, NULL); revert_creds(oldcred); inode_unlock(loweri); shiftfs_copyattr(loweri, d_inode(dentry)); return err; } static int shiftfs_getattr(const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct inode *inode = path->dentry->d_inode; struct dentry *lowerd = path->dentry->d_fsdata; struct inode *loweri = lowerd->d_inode; struct shiftfs_super_info *info = path->dentry->d_sb->s_fs_info; struct path newpath = { .mnt = info->mnt, .dentry = lowerd }; struct user_namespace *from_ns = loweri->i_sb->s_user_ns; struct user_namespace *to_ns = inode->i_sb->s_user_ns; const struct cred *oldcred; int err; oldcred = shiftfs_override_creds(inode->i_sb); err = vfs_getattr(&newpath, stat, request_mask, query_flags); revert_creds(oldcred); if (err) return err; /* transform the underlying id */ stat->uid = shift_kuid(from_ns, to_ns, stat->uid); stat->gid = shift_kgid(from_ns, to_ns, stat->gid); return 0; } #ifdef CONFIG_SHIFT_FS_POSIX_ACL static int shift_acl_ids(struct user_namespace *from, struct user_namespace *to, struct posix_acl *acl) { int i; for (i = 0; i < acl->a_count; i++) { struct posix_acl_entry *e = &acl->a_entries[i]; switch(e->e_tag) { case ACL_USER: e->e_uid = shift_kuid(from, to, e->e_uid); if (!uid_valid(e->e_uid)) return -EOVERFLOW; break; case ACL_GROUP: e->e_gid = shift_kgid(from, to, e->e_gid); if (!gid_valid(e->e_gid)) return -EOVERFLOW; break; } } return 0; } static void shift_acl_xattr_ids(struct user_namespace *from, struct user_namespace *to, void *value, size_t size) { struct posix_acl_xattr_header *header = value; struct posix_acl_xattr_entry *entry = (void *)(header + 1), *end; int count; kuid_t kuid; kgid_t kgid; if (!value) return; if (size < sizeof(struct posix_acl_xattr_header)) return; if (header->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION)) return; count = posix_acl_xattr_count(size); if (count < 0) return; if (count == 0) return; for (end = entry + count; entry != end; entry++) { switch(le16_to_cpu(entry->e_tag)) { case ACL_USER: kuid = make_kuid(&init_user_ns, le32_to_cpu(entry->e_id)); kuid = shift_kuid(from, to, kuid); entry->e_id = cpu_to_le32(from_kuid(&init_user_ns, kuid)); break; case ACL_GROUP: kgid = make_kgid(&init_user_ns, le32_to_cpu(entry->e_id)); kgid = shift_kgid(from, to, kgid); entry->e_id = cpu_to_le32(from_kgid(&init_user_ns, kgid)); break; default: break; } } } static struct posix_acl *shiftfs_get_acl(struct inode *inode, int type) { struct inode *loweri = inode->i_private; const struct cred *oldcred; struct posix_acl *lower_acl, *acl = NULL; struct user_namespace *from_ns = loweri->i_sb->s_user_ns; struct user_namespace *to_ns = inode->i_sb->s_user_ns; int size; int err; if (!IS_POSIXACL(loweri)) return NULL; oldcred = shiftfs_override_creds(inode->i_sb); lower_acl = get_acl(loweri, type); revert_creds(oldcred); if (lower_acl && !IS_ERR(lower_acl)) { /* XXX: export posix_acl_clone? */ size = sizeof(struct posix_acl) + lower_acl->a_count * sizeof(struct posix_acl_entry); acl = kmemdup(lower_acl, size, GFP_KERNEL); posix_acl_release(lower_acl); if (!acl) return ERR_PTR(-ENOMEM); refcount_set(&acl->a_refcount, 1); err = shift_acl_ids(from_ns, to_ns, acl); if (err) { kfree(acl); return ERR_PTR(err); } } return acl; } static int shiftfs_posix_acl_xattr_get(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, void *buffer, size_t size) { struct inode *loweri = inode->i_private; int ret; ret = shiftfs_xattr_get(NULL, dentry, inode, handler->name, buffer, size); if (ret < 0) return ret; inode_lock(loweri); shift_acl_xattr_ids(loweri->i_sb->s_user_ns, inode->i_sb->s_user_ns, buffer, size); inode_unlock(loweri); return ret; } static int shiftfs_posix_acl_xattr_set(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, const void *value, size_t size, int flags) { struct inode *loweri = inode->i_private; int err; if (!IS_POSIXACL(loweri) || !loweri->i_op->set_acl) return -EOPNOTSUPP; if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) return value ? -EACCES : 0; if (!inode_owner_or_capable(inode)) return -EPERM; if (value) { shift_acl_xattr_ids(inode->i_sb->s_user_ns, loweri->i_sb->s_user_ns, (void *)value, size); err = shiftfs_setxattr(dentry, inode, handler->name, value, size, flags); } else { err = shiftfs_removexattr(dentry, handler->name); } if (!err) shiftfs_copyattr(loweri, inode); return err; } static const struct xattr_handler shiftfs_posix_acl_access_xattr_handler = { .name = XATTR_NAME_POSIX_ACL_ACCESS, .flags = ACL_TYPE_ACCESS, .get = shiftfs_posix_acl_xattr_get, .set = shiftfs_posix_acl_xattr_set, }; static const struct xattr_handler shiftfs_posix_acl_default_xattr_handler = { .name = XATTR_NAME_POSIX_ACL_DEFAULT, .flags = ACL_TYPE_DEFAULT, .get = shiftfs_posix_acl_xattr_get, .set = shiftfs_posix_acl_xattr_set, }; #else /* !CONFIG_SHIFT_FS_POSIX_ACL */ #define shiftfs_get_acl NULL #endif /* CONFIG_SHIFT_FS_POSIX_ACL */ static const struct inode_operations shiftfs_dir_inode_operations = { .lookup = shiftfs_lookup, .mkdir = shiftfs_mkdir, .symlink = shiftfs_symlink, .unlink = shiftfs_unlink, .rmdir = shiftfs_rmdir, .rename = shiftfs_rename, .link = shiftfs_link, .setattr = shiftfs_setattr, .create = shiftfs_create, .mknod = shiftfs_mknod, .permission = shiftfs_permission, .getattr = shiftfs_getattr, .listxattr = shiftfs_listxattr, .get_acl = shiftfs_get_acl, }; static const struct inode_operations shiftfs_file_inode_operations = { .fiemap = shiftfs_fiemap, .getattr = shiftfs_getattr, .get_acl = shiftfs_get_acl, .listxattr = shiftfs_listxattr, .permission = shiftfs_permission, .setattr = shiftfs_setattr, .tmpfile = shiftfs_tmpfile, }; static const struct inode_operations shiftfs_special_inode_operations = { .getattr = shiftfs_getattr, .get_acl = shiftfs_get_acl, .listxattr = shiftfs_listxattr, .permission = shiftfs_permission, .setattr = shiftfs_setattr, }; static const struct inode_operations shiftfs_symlink_inode_operations = { .getattr = shiftfs_getattr, .get_link = shiftfs_get_link, .listxattr = shiftfs_listxattr, .setattr = shiftfs_setattr, }; static struct file *shiftfs_open_realfile(const struct file *file, struct inode *realinode) { struct file *realfile; const struct cred *old_cred; struct inode *inode = file_inode(file); struct dentry *lowerd = file->f_path.dentry->d_fsdata; struct shiftfs_super_info *info = inode->i_sb->s_fs_info; struct path realpath = { .mnt = info->mnt, .dentry = lowerd }; old_cred = shiftfs_override_creds(inode->i_sb); realfile = open_with_fake_path(&realpath, file->f_flags, realinode, info->creator_cred); revert_creds(old_cred); return realfile; } #define SHIFTFS_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT) static int shiftfs_change_flags(struct file *file, unsigned int flags) { struct inode *inode = file_inode(file); int err; /* if some flag changed that cannot be changed then something's amiss */ if (WARN_ON((file->f_flags ^ flags) & ~SHIFTFS_SETFL_MASK)) return -EIO; flags &= SHIFTFS_SETFL_MASK; if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode)) return -EPERM; if (flags & O_DIRECT) { if (!file->f_mapping->a_ops || !file->f_mapping->a_ops->direct_IO) return -EINVAL; } if (file->f_op->check_flags) { err = file->f_op->check_flags(flags); if (err) return err; } spin_lock(&file->f_lock); file->f_flags = (file->f_flags & ~SHIFTFS_SETFL_MASK) | flags; spin_unlock(&file->f_lock); return 0; } static int shiftfs_open(struct inode *inode, struct file *file) { struct file *realfile; realfile = shiftfs_open_realfile(file, inode->i_private); if (IS_ERR(realfile)) return PTR_ERR(realfile); file->private_data = realfile; /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO. */ file->f_mapping = realfile->f_mapping; return 0; } static int shiftfs_dir_open(struct inode *inode, struct file *file) { struct file *realfile; const struct cred *oldcred; struct dentry *lowerd = file->f_path.dentry->d_fsdata; struct shiftfs_super_info *info = inode->i_sb->s_fs_info; struct path realpath = { .mnt = info->mnt, .dentry = lowerd }; oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb); realfile = dentry_open(&realpath, file->f_flags | O_NOATIME, info->creator_cred); revert_creds(oldcred); if (IS_ERR(realfile)) return PTR_ERR(realfile); file->private_data = realfile; return 0; } static int shiftfs_release(struct inode *inode, struct file *file) { struct file *realfile = file->private_data; if (realfile) fput(realfile); return 0; } static int shiftfs_dir_release(struct inode *inode, struct file *file) { return shiftfs_release(inode, file); } static loff_t shiftfs_dir_llseek(struct file *file, loff_t offset, int whence) { struct file *realfile = file->private_data; return vfs_llseek(realfile, offset, whence); } static loff_t shiftfs_file_llseek(struct file *file, loff_t offset, int whence) { struct inode *realinode = file_inode(file)->i_private; return generic_file_llseek_size(file, offset, whence, realinode->i_sb->s_maxbytes, i_size_read(realinode)); } /* XXX: Need to figure out what to to about atime updates, maybe other * timestamps too ... ref. ovl_file_accessed() */ static rwf_t shiftfs_iocb_to_rwf(struct kiocb *iocb) { int ifl = iocb->ki_flags; rwf_t flags = 0; if (ifl & IOCB_NOWAIT) flags |= RWF_NOWAIT; if (ifl & IOCB_HIPRI) flags |= RWF_HIPRI; if (ifl & IOCB_DSYNC) flags |= RWF_DSYNC; if (ifl & IOCB_SYNC) flags |= RWF_SYNC; return flags; } static int shiftfs_real_fdget(const struct file *file, struct fd *lowerfd) { struct file *realfile; if (file->f_op->open != shiftfs_open && file->f_op->open != shiftfs_dir_open) return -EINVAL; realfile = file->private_data; lowerfd->flags = 0; lowerfd->file = realfile; /* Did the flags change since open? */ if (unlikely(file->f_flags & ~lowerfd->file->f_flags)) return shiftfs_change_flags(lowerfd->file, file->f_flags); return 0; } static ssize_t shiftfs_read_iter(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; struct fd lowerfd; const struct cred *oldcred; ssize_t ret; if (!iov_iter_count(iter)) return 0; ret = shiftfs_real_fdget(file, &lowerfd); if (ret) return ret; oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb); ret = vfs_iter_read(lowerfd.file, iter, &iocb->ki_pos, shiftfs_iocb_to_rwf(iocb)); revert_creds(oldcred); shiftfs_file_accessed(file); fdput(lowerfd); return ret; } static ssize_t shiftfs_write_iter(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct fd lowerfd; const struct cred *oldcred; ssize_t ret; if (!iov_iter_count(iter)) return 0; inode_lock(inode); /* Update mode */ shiftfs_copyattr(inode->i_private, inode); ret = file_remove_privs(file); if (ret) goto out_unlock; ret = shiftfs_real_fdget(file, &lowerfd); if (ret) goto out_unlock; oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb); file_start_write(lowerfd.file); ret = vfs_iter_write(lowerfd.file, iter, &iocb->ki_pos, shiftfs_iocb_to_rwf(iocb)); file_end_write(lowerfd.file); revert_creds(oldcred); /* Update size */ shiftfs_copyattr(inode->i_private, inode); fdput(lowerfd); out_unlock: inode_unlock(inode); return ret; } static int shiftfs_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct fd lowerfd; const struct cred *oldcred; int ret; ret = shiftfs_real_fdget(file, &lowerfd); if (ret) return ret; oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb); ret = vfs_fsync_range(lowerfd.file, start, end, datasync); revert_creds(oldcred); fdput(lowerfd); return ret; } static int shiftfs_mmap(struct file *file, struct vm_area_struct *vma) { struct file *realfile = file->private_data; const struct cred *oldcred; int ret; if (!realfile->f_op->mmap) return -ENODEV; if (WARN_ON(file != vma->vm_file)) return -EIO; oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb); vma->vm_file = get_file(realfile); ret = call_mmap(vma->vm_file, vma); revert_creds(oldcred); shiftfs_file_accessed(file); if (ret) { /* * Drop refcount from new vm_file value and restore original * vm_file value */ vma->vm_file = file; fput(realfile); } else { /* Drop refcount from previous vm_file value */ fput(file); } return ret; } static long shiftfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { struct inode *inode = file_inode(file); struct inode *loweri = inode->i_private; struct fd lowerfd; const struct cred *oldcred; int ret; ret = shiftfs_real_fdget(file, &lowerfd); if (ret) return ret; oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb); ret = vfs_fallocate(lowerfd.file, mode, offset, len); revert_creds(oldcred); /* Update size */ shiftfs_copyattr(loweri, inode); fdput(lowerfd); return ret; } static int shiftfs_fadvise(struct file *file, loff_t offset, loff_t len, int advice) { struct fd lowerfd; const struct cred *oldcred; int ret; ret = shiftfs_real_fdget(file, &lowerfd); if (ret) return ret; oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb); ret = vfs_fadvise(lowerfd.file, offset, len, advice); revert_creds(oldcred); fdput(lowerfd); return ret; } static int shiftfs_override_ioctl_creds(int cmd, const struct super_block *sb, const struct cred **oldcred, struct cred **newcred) { struct shiftfs_super_info *sbinfo = sb->s_fs_info; kuid_t fsuid = current_fsuid(); kgid_t fsgid = current_fsgid(); *oldcred = shiftfs_override_creds(sb); *newcred = prepare_creds(); if (!*newcred) { revert_creds(*oldcred); return -ENOMEM; } (*newcred)->fsuid = shift_kuid(sb->s_user_ns, sbinfo->userns, fsuid); (*newcred)->fsgid = shift_kgid(sb->s_user_ns, sbinfo->userns, fsgid); /* clear all caps to prevent bypassing capable() checks */ cap_clear((*newcred)->cap_bset); cap_clear((*newcred)->cap_effective); cap_clear((*newcred)->cap_inheritable); cap_clear((*newcred)->cap_permitted); if (cmd == BTRFS_IOC_SNAP_DESTROY) { kuid_t kuid_root = make_kuid(sb->s_user_ns, 0); /* * Allow the root user in the container to remove subvolumes * from other users. */ if (uid_valid(kuid_root) && uid_eq(fsuid, kuid_root)) cap_raise((*newcred)->cap_effective, CAP_DAC_OVERRIDE); } put_cred(override_creds(*newcred)); return 0; } static inline void shiftfs_revert_ioctl_creds(const struct cred *oldcred, struct cred *newcred) { return shiftfs_revert_object_creds(oldcred, newcred); } static inline bool is_btrfs_snap_ioctl(int cmd) { if ((cmd == BTRFS_IOC_SNAP_CREATE) || (cmd == BTRFS_IOC_SNAP_CREATE_V2)) return true; return false; } static int shiftfs_btrfs_ioctl_fd_restore(int cmd, int fd, void __user *arg, struct btrfs_ioctl_vol_args *v1, struct btrfs_ioctl_vol_args_v2 *v2) { int ret; if (!is_btrfs_snap_ioctl(cmd)) return 0; if (cmd == BTRFS_IOC_SNAP_CREATE) ret = copy_to_user(arg, v1, sizeof(*v1)); else ret = copy_to_user(arg, v2, sizeof(*v2)); __close_fd(current->files, fd); kfree(v1); kfree(v2); return ret ? -EFAULT: 0; } static int shiftfs_btrfs_ioctl_fd_replace(int cmd, void __user *arg, struct btrfs_ioctl_vol_args **b1, struct btrfs_ioctl_vol_args_v2 **b2, int *newfd) { int oldfd, ret; struct fd src; struct fd lfd = {}; struct btrfs_ioctl_vol_args *v1 = NULL; struct btrfs_ioctl_vol_args_v2 *v2 = NULL; *b1 = NULL; *b2 = NULL; if (!is_btrfs_snap_ioctl(cmd)) return 0; if (cmd == BTRFS_IOC_SNAP_CREATE) { v1 = memdup_user(arg, sizeof(*v1)); if (IS_ERR(v1)) return PTR_ERR(v1); oldfd = v1->fd; } else { v2 = memdup_user(arg, sizeof(*v2)); if (IS_ERR(v2)) return PTR_ERR(v2); oldfd = v2->fd; } src = fdget(oldfd); if (!src.file) { ret = -EINVAL; goto err_free; } ret = shiftfs_real_fdget(src.file, &lfd); if (ret) { fdput(src); goto err_free; } /* * shiftfs_real_fdget() does not take a reference to lfd.file, so * take a reference here to offset the one which will be put by * __close_fd(), and make sure that reference is put on fdput(lfd). */ get_file(lfd.file); lfd.flags |= FDPUT_FPUT; fdput(src); *newfd = get_unused_fd_flags(lfd.file->f_flags); if (*newfd < 0) { fdput(lfd); ret = *newfd; goto err_free; } fd_install(*newfd, lfd.file); if (cmd == BTRFS_IOC_SNAP_CREATE) { v1->fd = *newfd; ret = copy_to_user(arg, v1, sizeof(*v1)); v1->fd = oldfd; } else { v2->fd = *newfd; ret = copy_to_user(arg, v2, sizeof(*v2)); v2->fd = oldfd; } if (!ret) { *b1 = v1; *b2 = v2; } else { shiftfs_btrfs_ioctl_fd_restore(cmd, *newfd, arg, v1, v2); ret = -EFAULT; } return ret; err_free: kfree(v1); kfree(v2); return ret; } static long shiftfs_real_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct fd lowerfd; struct cred *newcred; const struct cred *oldcred; int newfd = -EBADF; long err = 0, ret = 0; void __user *argp = (void __user *)arg; struct super_block *sb = file->f_path.dentry->d_sb; struct btrfs_ioctl_vol_args *btrfs_v1 = NULL; struct btrfs_ioctl_vol_args_v2 *btrfs_v2 = NULL; ret = shiftfs_btrfs_ioctl_fd_replace(cmd, argp, &btrfs_v1, &btrfs_v2, &newfd); if (ret < 0) return ret; ret = shiftfs_real_fdget(file, &lowerfd); if (ret) goto out_restore; ret = shiftfs_override_ioctl_creds(cmd, sb, &oldcred, &newcred); if (ret) goto out_fdput; ret = vfs_ioctl(lowerfd.file, cmd, arg); shiftfs_revert_ioctl_creds(oldcred, newcred); shiftfs_copyattr(file_inode(lowerfd.file), file_inode(file)); shiftfs_copyflags(file_inode(lowerfd.file), file_inode(file)); out_fdput: fdput(lowerfd); out_restore: err = shiftfs_btrfs_ioctl_fd_restore(cmd, newfd, argp, btrfs_v1, btrfs_v2); if (!ret) ret = err; return ret; } static bool in_ioctl_whitelist(int flag, unsigned long arg) { void __user *argp = (void __user *)arg; u64 flags = 0; switch (flag) { case BTRFS_IOC_FS_INFO: return true; case BTRFS_IOC_SNAP_CREATE: return true; case BTRFS_IOC_SNAP_CREATE_V2: return true; case BTRFS_IOC_SUBVOL_CREATE: return true; case BTRFS_IOC_SUBVOL_CREATE_V2: return true; case BTRFS_IOC_SUBVOL_GETFLAGS: return true; case BTRFS_IOC_SUBVOL_SETFLAGS: if (copy_from_user(&flags, argp, sizeof(flags))) return false; if (flags & ~BTRFS_SUBVOL_RDONLY) return false; return true; case BTRFS_IOC_SNAP_DESTROY: return true; } return false; } static long shiftfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { switch (cmd) { case FS_IOC_GETVERSION: /* fall through */ case FS_IOC_GETFLAGS: /* fall through */ case FS_IOC_SETFLAGS: break; default: if (!in_ioctl_whitelist(cmd, arg) || !shiftfs_passthrough_ioctls(file->f_path.dentry->d_sb->s_fs_info)) return -ENOTTY; } return shiftfs_real_ioctl(file, cmd, arg); } static long shiftfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { switch (cmd) { case FS_IOC32_GETVERSION: /* fall through */ case FS_IOC32_GETFLAGS: /* fall through */ case FS_IOC32_SETFLAGS: break; default: if (!in_ioctl_whitelist(cmd, arg) || !shiftfs_passthrough_ioctls(file->f_path.dentry->d_sb->s_fs_info)) return -ENOIOCTLCMD; } return shiftfs_real_ioctl(file, cmd, arg); } enum shiftfs_copyop { SHIFTFS_COPY, SHIFTFS_CLONE, SHIFTFS_DEDUPE, }; static ssize_t shiftfs_copyfile(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, u64 len, unsigned int flags, enum shiftfs_copyop op) { ssize_t ret; struct fd real_in, real_out; const struct cred *oldcred; struct inode *inode_out = file_inode(file_out); struct inode *loweri = inode_out->i_private; ret = shiftfs_real_fdget(file_out, &real_out); if (ret) return ret; ret = shiftfs_real_fdget(file_in, &real_in); if (ret) { fdput(real_out); return ret; } oldcred = shiftfs_override_creds(inode_out->i_sb); switch (op) { case SHIFTFS_COPY: ret = vfs_copy_file_range(real_in.file, pos_in, real_out.file, pos_out, len, flags); break; case SHIFTFS_CLONE: ret = vfs_clone_file_range(real_in.file, pos_in, real_out.file, pos_out, len, flags); break; case SHIFTFS_DEDUPE: ret = vfs_dedupe_file_range_one(real_in.file, pos_in, real_out.file, pos_out, len, flags); break; } revert_creds(oldcred); /* Update size */ shiftfs_copyattr(loweri, inode_out); fdput(real_in); fdput(real_out); return ret; } static ssize_t shiftfs_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t len, unsigned int flags) { return shiftfs_copyfile(file_in, pos_in, file_out, pos_out, len, flags, SHIFTFS_COPY); } static loff_t shiftfs_remap_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t len, unsigned int remap_flags) { enum shiftfs_copyop op; if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) return -EINVAL; if (remap_flags & REMAP_FILE_DEDUP) op = SHIFTFS_DEDUPE; else op = SHIFTFS_CLONE; return shiftfs_copyfile(file_in, pos_in, file_out, pos_out, len, remap_flags, op); } static int shiftfs_iterate_shared(struct file *file, struct dir_context *ctx) { const struct cred *oldcred; int err = -ENOTDIR; struct file *realfile = file->private_data; oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb); err = iterate_dir(realfile, ctx); revert_creds(oldcred); return err; } const struct file_operations shiftfs_file_operations = { .open = shiftfs_open, .release = shiftfs_release, .llseek = shiftfs_file_llseek, .read_iter = shiftfs_read_iter, .write_iter = shiftfs_write_iter, .fsync = shiftfs_fsync, .mmap = shiftfs_mmap, .fallocate = shiftfs_fallocate, .fadvise = shiftfs_fadvise, .unlocked_ioctl = shiftfs_ioctl, .compat_ioctl = shiftfs_compat_ioctl, .copy_file_range = shiftfs_copy_file_range, .remap_file_range = shiftfs_remap_file_range, }; const struct file_operations shiftfs_dir_operations = { .open = shiftfs_dir_open, .release = shiftfs_dir_release, .compat_ioctl = shiftfs_compat_ioctl, .fsync = shiftfs_fsync, .iterate_shared = shiftfs_iterate_shared, .llseek = shiftfs_dir_llseek, .read = generic_read_dir, .unlocked_ioctl = shiftfs_ioctl, }; static const struct address_space_operations shiftfs_aops = { /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */ .direct_IO = noop_direct_IO, }; static void shiftfs_fill_inode(struct inode *inode, unsigned long ino, umode_t mode, dev_t dev, struct dentry *dentry) { struct inode *loweri; inode->i_ino = ino; inode->i_flags |= S_NOCMTIME; mode &= S_IFMT; inode->i_mode = mode; switch (mode & S_IFMT) { case S_IFDIR: inode->i_op = &shiftfs_dir_inode_operations; inode->i_fop = &shiftfs_dir_operations; break; case S_IFLNK: inode->i_op = &shiftfs_symlink_inode_operations; break; case S_IFREG: inode->i_op = &shiftfs_file_inode_operations; inode->i_fop = &shiftfs_file_operations; inode->i_mapping->a_ops = &shiftfs_aops; break; default: inode->i_op = &shiftfs_special_inode_operations; init_special_inode(inode, mode, dev); break; } if (!dentry) return; loweri = dentry->d_inode; if (!loweri->i_op->get_link) inode->i_opflags |= IOP_NOFOLLOW; shiftfs_copyattr(loweri, inode); shiftfs_copyflags(loweri, inode); set_nlink(inode, loweri->i_nlink); } static int shiftfs_show_options(struct seq_file *m, struct dentry *dentry) { struct super_block *sb = dentry->d_sb; struct shiftfs_super_info *sbinfo = sb->s_fs_info; if (sbinfo->mark) seq_show_option(m, "mark", NULL); if (sbinfo->passthrough) seq_printf(m, ",passthrough=%u", sbinfo->passthrough); return 0; } static int shiftfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; struct shiftfs_super_info *sbinfo = sb->s_fs_info; struct dentry *root = sb->s_root; struct dentry *realroot = root->d_fsdata; struct path realpath = { .mnt = sbinfo->mnt, .dentry = realroot }; int err; err = vfs_statfs(&realpath, buf); if (err) return err; if (!shiftfs_passthrough_statfs(sbinfo)) buf->f_type = sb->s_magic; return 0; } static void shiftfs_evict_inode(struct inode *inode) { struct inode *loweri = inode->i_private; clear_inode(inode); if (loweri) iput(loweri); } static void shiftfs_put_super(struct super_block *sb) { struct shiftfs_super_info *sbinfo = sb->s_fs_info; if (sbinfo) { mntput(sbinfo->mnt); put_cred(sbinfo->creator_cred); kfree(sbinfo); } } static const struct xattr_handler shiftfs_xattr_handler = { .prefix = "", .get = shiftfs_xattr_get, .set = shiftfs_xattr_set, }; const struct xattr_handler *shiftfs_xattr_handlers[] = { #ifdef CONFIG_SHIFT_FS_POSIX_ACL &shiftfs_posix_acl_access_xattr_handler, &shiftfs_posix_acl_default_xattr_handler, #endif &shiftfs_xattr_handler, NULL }; static inline bool passthrough_is_subset(int old_flags, int new_flags) { if ((new_flags & old_flags) != new_flags) return false; return true; } static int shiftfs_super_check_flags(unsigned long old_flags, unsigned long new_flags) { if ((old_flags & SB_RDONLY) && !(new_flags & SB_RDONLY)) return -EPERM; if ((old_flags & SB_NOSUID) && !(new_flags & SB_NOSUID)) return -EPERM; if ((old_flags & SB_NODEV) && !(new_flags & SB_NODEV)) return -EPERM; if ((old_flags & SB_NOEXEC) && !(new_flags & SB_NOEXEC)) return -EPERM; if ((old_flags & SB_NOATIME) && !(new_flags & SB_NOATIME)) return -EPERM; if ((old_flags & SB_NODIRATIME) && !(new_flags & SB_NODIRATIME)) return -EPERM; if (!(old_flags & SB_POSIXACL) && (new_flags & SB_POSIXACL)) return -EPERM; return 0; } static int shiftfs_remount(struct super_block *sb, int *flags, char *data) { int err; struct shiftfs_super_info new = {}; struct shiftfs_super_info *info = sb->s_fs_info; err = shiftfs_parse_mount_options(&new, data); if (err) return err; err = shiftfs_super_check_flags(sb->s_flags, *flags); if (err) return err; /* Mark mount option cannot be changed. */ if (info->mark || (info->mark != new.mark)) return -EPERM; if (info->passthrough != new.passthrough) { /* Don't allow exceeding passthrough options of mark mount. */ if (!passthrough_is_subset(info->passthrough_mark, info->passthrough)) return -EPERM; info->passthrough = new.passthrough; } return 0; } static const struct super_operations shiftfs_super_ops = { .put_super = shiftfs_put_super, .show_options = shiftfs_show_options, .statfs = shiftfs_statfs, .remount_fs = shiftfs_remount, .evict_inode = shiftfs_evict_inode, }; struct shiftfs_data { void *data; const char *path; }; static void shiftfs_super_force_flags(struct super_block *sb, unsigned long lower_flags) { sb->s_flags |= lower_flags & (SB_RDONLY | SB_NOSUID | SB_NODEV | SB_NOEXEC | SB_NOATIME | SB_NODIRATIME); if (!(lower_flags & SB_POSIXACL)) sb->s_flags &= ~SB_POSIXACL; } static int shiftfs_fill_super(struct super_block *sb, void *raw_data, int silent) { int err; struct path path = {}; struct shiftfs_super_info *sbinfo_mp; char *name = NULL; struct inode *inode = NULL; struct dentry *dentry = NULL; struct shiftfs_data *data = raw_data; struct shiftfs_super_info *sbinfo = NULL; if (!data->path) return -EINVAL; sb->s_fs_info = kzalloc(sizeof(*sbinfo), GFP_KERNEL); if (!sb->s_fs_info) return -ENOMEM; sbinfo = sb->s_fs_info; err = shiftfs_parse_mount_options(sbinfo, data->data); if (err) return err; /* to mount a mark, must be userns admin */ if (!sbinfo->mark && !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) return -EPERM; name = kstrdup(data->path, GFP_KERNEL); if (!name) return -ENOMEM; err = kern_path(name, LOOKUP_FOLLOW, &path); if (err) goto out_free_name; if (!S_ISDIR(path.dentry->d_inode->i_mode)) { err = -ENOTDIR; goto out_put_path; } sb->s_flags |= SB_POSIXACL; if (sbinfo->mark) { struct cred *cred_tmp; struct super_block *lower_sb = path.mnt->mnt_sb; /* to mark a mount point, must root wrt lower s_user_ns */ if (!ns_capable(lower_sb->s_user_ns, CAP_SYS_ADMIN)) { err = -EPERM; goto out_put_path; } /* * this part is visible unshifted, so make sure no * executables that could be used to give suid * privileges */ sb->s_iflags = SB_I_NOEXEC; shiftfs_super_force_flags(sb, lower_sb->s_flags); /* * Handle nesting of shiftfs mounts by referring this mark * mount back to the original mark mount. This is more * efficient and alleviates concerns about stack depth. */ if (lower_sb->s_magic == SHIFTFS_MAGIC) { sbinfo_mp = lower_sb->s_fs_info; /* Doesn't make sense to mark a mark mount */ if (sbinfo_mp->mark) { err = -EINVAL; goto out_put_path; } if (!passthrough_is_subset(sbinfo_mp->passthrough, sbinfo->passthrough)) { err = -EPERM; goto out_put_path; } sbinfo->mnt = mntget(sbinfo_mp->mnt); dentry = dget(path.dentry->d_fsdata); /* * Copy up the passthrough mount options from the * parent mark mountpoint. */ sbinfo->passthrough_mark = sbinfo_mp->passthrough_mark; sbinfo->creator_cred = get_cred(sbinfo_mp->creator_cred); } else { sbinfo->mnt = mntget(path.mnt); dentry = dget(path.dentry); /* * For a new mark passthrough_mark and passthrough * are identical. */ sbinfo->passthrough_mark = sbinfo->passthrough; cred_tmp = prepare_creds(); if (!cred_tmp) { err = -ENOMEM; goto out_put_path; } /* Don't override disk quota limits or use reserved space. */ cap_lower(cred_tmp->cap_effective, CAP_SYS_RESOURCE); sbinfo->creator_cred = cred_tmp; } } else { /* * This leg executes if we're admin capable in the namespace, * so be very careful. */ err = -EPERM; if (path.dentry->d_sb->s_magic != SHIFTFS_MAGIC) goto out_put_path; sbinfo_mp = path.dentry->d_sb->s_fs_info; if (!sbinfo_mp->mark) goto out_put_path; if (!passthrough_is_subset(sbinfo_mp->passthrough, sbinfo->passthrough)) goto out_put_path; sbinfo->mnt = mntget(sbinfo_mp->mnt); sbinfo->creator_cred = get_cred(sbinfo_mp->creator_cred); dentry = dget(path.dentry->d_fsdata); /* * Copy up passthrough settings from mark mountpoint so we can * verify when the overlay wants to remount with different * passthrough settings. */ sbinfo->passthrough_mark = sbinfo_mp->passthrough; shiftfs_super_force_flags(sb, path.mnt->mnt_sb->s_flags); } sb->s_stack_depth = dentry->d_sb->s_stack_depth + 1; if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { printk(KERN_ERR "shiftfs: maximum stacking depth exceeded\n"); err = -EINVAL; goto out_put_path; } inode = new_inode(sb); if (!inode) { err = -ENOMEM; goto out_put_path; } shiftfs_fill_inode(inode, dentry->d_inode->i_ino, S_IFDIR, 0, dentry); ihold(dentry->d_inode); inode->i_private = dentry->d_inode; sb->s_magic = SHIFTFS_MAGIC; sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_op = &shiftfs_super_ops; sb->s_xattr = shiftfs_xattr_handlers; sb->s_d_op = &shiftfs_dentry_ops; sb->s_root = d_make_root(inode); if (!sb->s_root) { err = -ENOMEM; goto out_put_path; } sb->s_root->d_fsdata = dentry; sbinfo->userns = get_user_ns(dentry->d_sb->s_user_ns); shiftfs_copyattr(dentry->d_inode, sb->s_root->d_inode); dentry = NULL; err = 0; out_put_path: path_put(&path); out_free_name: kfree(name); dput(dentry); return err; } static struct dentry *shiftfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { struct shiftfs_data d = { data, dev_name }; return mount_nodev(fs_type, flags, &d, shiftfs_fill_super); } static struct file_system_type shiftfs_type = { .owner = THIS_MODULE, .name = "shiftfs", .mount = shiftfs_mount, .kill_sb = kill_anon_super, .fs_flags = FS_USERNS_MOUNT, }; static int __init shiftfs_init(void) { return register_filesystem(&shiftfs_type); } static void __exit shiftfs_exit(void) { unregister_filesystem(&shiftfs_type); } MODULE_ALIAS_FS("shiftfs"); MODULE_AUTHOR("James Bottomley"); MODULE_AUTHOR("Seth Forshee <seth.forshee@canonical.com>"); MODULE_AUTHOR("Christian Brauner <christian.brauner@ubuntu.com>"); MODULE_DESCRIPTION("id shifting filesystem"); MODULE_LICENSE("GPL v2"); module_init(shiftfs_init) module_exit(shiftfs_exit)