diff --git a/book/src/kernel/linux-compatibility/limitations-on-system-calls/file-systems-and-mount-control.md b/book/src/kernel/linux-compatibility/limitations-on-system-calls/file-systems-and-mount-control.md index d3cf6d8c6..367dbee9d 100644 --- a/book/src/kernel/linux-compatibility/limitations-on-system-calls/file-systems-and-mount-control.md +++ b/book/src/kernel/linux-compatibility/limitations-on-system-calls/file-systems-and-mount-control.md @@ -32,31 +32,29 @@ mount( // Create a bind mount mount( source, target, filesystemtype, - mountflags = MS_BIND | MS_REC | MS_MOVE, + mountflags = MS_BIND | MS_REC, data ); ``` -Silently-ignored mount flags: -* `MS_DIRSYNC` -* `MS_LAZYTIME` -* `MS_MANDLOCK` -* `MS_NOATIME` -* `MS_NODEV` -* `MS_NODIRATIME` -* `MS_NOEXEC` -* `MS_NOSUID` -* `MS_RDONLY` -* `MS_RELATIME` -* `MS_SILENT` -* `MS_STRICTATIME` -* `MS_SYNCHRONOUS` - Partially supported mount flags: * `MS_REC` is only effective when used in conjunction with `MS_BIND` +* `MS_REMOUNT` can be used, but the set options have no actual effect. +* `MS_DIRSYNC` can be set but have no actual effect. +* `MS_LAZYTIME` can be set but have no actual effect. +* `MS_MANDLOCK` can be set but have no actual effect. +* `MS_NOATIME` can be set but have no actual effect. +* `MS_NODEV` can be set but have no actual effect. +* `MS_NODIRATIME` can be set but have no actual effect. +* `MS_NOEXEC` can be set but have no actual effect. +* `MS_NOSUID` can be set but have no actual effect. +* `MS_RDONLY` can be set but have no actual effect. +* `MS_RELATIME` can be set but have no actual effect. +* `MS_SILENT` can be set but have no actual effect. +* `MS_STRICTATIME` can be set but have no actual effect. +* `MS_SYNCHRONOUS` can be set but have no actual effect. Unsupported mount flags: -* `MS_REMOUNT` * `MS_SHARED` * `MS_SLAVE` * `MS_UNBINDABLE` diff --git a/kernel/src/device/mod.rs b/kernel/src/device/mod.rs index 863e2ec72..dcbde6fd1 100644 --- a/kernel/src/device/mod.rs +++ b/kernel/src/device/mod.rs @@ -22,6 +22,7 @@ use crate::{ fs::{ device::{add_node, Device, DeviceId}, fs_resolver::FsPath, + path::PerMountFlags, ramfs::RamFs, }, prelude::*, @@ -34,7 +35,7 @@ pub fn init_in_first_process(ctx: &Context) -> Result<()> { // Mount DevFS let dev_path = fs_resolver.lookup(&FsPath::try_from("/dev")?)?; - dev_path.mount(RamFs::new(), ctx)?; + dev_path.mount(RamFs::new(), PerMountFlags::default(), ctx)?; let null = Arc::new(null::Null); add_node(null, "null", &fs_resolver)?; diff --git a/kernel/src/device/pty/mod.rs b/kernel/src/device/pty/mod.rs index 538277f43..add30160c 100644 --- a/kernel/src/device/pty/mod.rs +++ b/kernel/src/device/pty/mod.rs @@ -4,7 +4,7 @@ use crate::{ fs::{ devpts::DevPts, fs_resolver::{FsPath, FsResolver}, - path::Path, + path::{Path, PerMountFlags}, utils::{mkmod, Inode, InodeType}, }, prelude::*, @@ -23,7 +23,7 @@ pub fn init_in_first_process(fs_resolver: &FsResolver, ctx: &Context) -> Result< let dev = fs_resolver.lookup(&FsPath::try_from("/dev")?)?; // Create the "pts" directory and mount devpts on it. let devpts_path = dev.new_fs_child("pts", InodeType::Dir, mkmod!(a+rx, u+w))?; - let devpts_mount = devpts_path.mount(DevPts::new(), ctx)?; + let devpts_mount = devpts_path.mount(DevPts::new(), PerMountFlags::default(), ctx)?; DEV_PTS.call_once(|| Path::new_fs_root(devpts_mount)); diff --git a/kernel/src/device/shm.rs b/kernel/src/device/shm.rs index 224562a0a..440573ae6 100644 --- a/kernel/src/device/shm.rs +++ b/kernel/src/device/shm.rs @@ -3,6 +3,7 @@ use crate::{ fs::{ fs_resolver::{FsPath, FsResolver}, + path::PerMountFlags, ramfs::RamFs, utils::{chmod, InodeType}, }, @@ -16,7 +17,7 @@ pub fn init_in_first_process(fs_resolver: &FsResolver, ctx: &Context) -> Result< // Create the "shm" directory under "/dev" and mount a ramfs on it. let shm_path = dev_path.new_fs_child("shm", InodeType::Dir, chmod!(InodeMode::S_ISVTX, a+rwx))?; - shm_path.mount(RamFs::new(), ctx)?; + shm_path.mount(RamFs::new(), PerMountFlags::default(), ctx)?; log::debug!("Mount RamFs at \"/dev/shm\""); Ok(()) } diff --git a/kernel/src/fs/devpts/mod.rs b/kernel/src/fs/devpts/mod.rs index 62afad298..68e5e9116 100644 --- a/kernel/src/fs/devpts/mod.rs +++ b/kernel/src/fs/devpts/mod.rs @@ -103,10 +103,6 @@ impl FileSystem for DevPts { fn sb(&self) -> SuperBlock { self.sb.clone() } - - fn flags(&self) -> FsFlags { - FsFlags::empty() - } } struct DevPtsType; @@ -122,6 +118,7 @@ impl FsType for DevPtsType { fn create( &self, + _flags: FsFlags, _args: Option, _disk: Option>, ) -> Result> { diff --git a/kernel/src/fs/path/mod.rs b/kernel/src/fs/path/mod.rs index 5a47cd0af..5cb110567 100644 --- a/kernel/src/fs/path/mod.rs +++ b/kernel/src/fs/path/mod.rs @@ -5,7 +5,7 @@ use core::time::Duration; use inherit_methods_macro::inherit_methods; -pub use mount::{Mount, MountPropType}; +pub use mount::{Mount, MountPropType, PerMountFlags}; pub use mount_namespace::MountNamespace; use crate::{ @@ -13,8 +13,8 @@ use crate::{ inode_handle::InodeHandle, path::dentry::{Dentry, DentryKey}, utils::{ - CreationFlags, FileSystem, Inode, InodeMode, InodeType, Metadata, MknodType, OpenArgs, - Permission, StatusFlags, XattrName, XattrNamespace, XattrSetFlags, NAME_MAX, + CreationFlags, FileSystem, FsFlags, Inode, InodeMode, InodeType, Metadata, MknodType, + OpenArgs, Permission, StatusFlags, XattrName, XattrNamespace, XattrSetFlags, NAME_MAX, }, }, prelude::*, @@ -205,7 +205,12 @@ impl Path { /// Returns `ENOTDIR` if the path is not a directory. /// Returns `EINVAL` if attempting to mount on root or if the path is not /// in the current mount namespace. - pub fn mount(&self, fs: Arc, ctx: &Context) -> Result> { + pub fn mount( + &self, + fs: Arc, + flags: PerMountFlags, + ctx: &Context, + ) -> Result> { if self.type_() != InodeType::Dir { return_errno_with_message!(Errno::ENOTDIR, "the path is not a directory"); } @@ -220,7 +225,7 @@ impl Path { return_errno_with_message!(Errno::EINVAL, "the path is not in this mount namespace"); } - let child_mount = self.mount.do_mount(fs, &self.dentry)?; + let child_mount = self.mount.do_mount(fs, flags, &self.dentry)?; Ok(child_mount) } @@ -256,6 +261,36 @@ impl Path { Ok(child_mount) } + /// Remounts the filesystem with new `PerMountFlags` and optionally new `FsFlags`. + /// + /// If `fs_flags` is provided, it will update the flags of the mounted filesystem, + /// otherwise, only the flags of the current mount will be updated. + /// + /// # Errors + /// + /// Returns `EINVAL` in the following cases: + /// - The current path is not a mount root. + /// - The current path is not in the current mount namespace. + pub fn remount( + &self, + mount_flags: PerMountFlags, + fs_flags: Option, + data: Option, + ctx: &Context, + ) -> Result<()> { + if !self.is_mount_root() { + return_errno_with_message!(Errno::EINVAL, "the path is not a mount root"); + }; + + let current_ns_proxy = ctx.thread_local.borrow_ns_proxy(); + let current_mnt_ns = current_ns_proxy.unwrap().mnt_ns(); + if !current_mnt_ns.owns(&self.mount) { + return_errno_with_message!(Errno::EINVAL, "the path is not in this mount namespace"); + } + + self.mount.remount(mount_flags, fs_flags, data, ctx) + } + /// Creates a bind mount from the current path to the destination path. /// /// Creates a new mount tree that mirrors either the root mount (non-recursive) diff --git a/kernel/src/fs/path/mount.rs b/kernel/src/fs/path/mount.rs index ad9408e89..a91b25270 100644 --- a/kernel/src/fs/path/mount.rs +++ b/kernel/src/fs/path/mount.rs @@ -1,6 +1,9 @@ // SPDX-License-Identifier: MPL-2.0 +use core::sync::atomic::{AtomicU32, Ordering}; + use aster_util::printer::VmPrinter; +use atomic_integer_wrapper::define_atomic_version_of_integer_like_type; use hashbrown::HashMap; use id_alloc::IdAlloc; use spin::Once; @@ -12,7 +15,7 @@ use crate::{ mount_namespace::MountNamespace, Path, }, - utils::{FileSystem, InodeType}, + utils::{FileSystem, FsFlags, InodeType}, }, prelude::*, }; @@ -44,6 +47,103 @@ pub(super) fn init() { ID_ALLOCATOR.call_once(|| SpinLock::new(IdAlloc::with_capacity(MAX_MOUNT_NUM))); } +bitflags! { + pub struct PerMountFlags: u32 { + /// Mount read-only. + const RDONLY = 1 << 0; + /// Ignore suid and sgid bits. + const NOSUID = 1 << 1; + /// Disallow access to device special files. + const NODEV = 1 << 2; + /// Disallow program execution. + const NOEXEC = 1 << 3; + /// Do not update access times. + const NOATIME = 1 << 10; + /// Do not update directory access times. + const NODIRATIME = 1 << 11; + /// Update atime relative to mtime/ctime. + const RELATIME = 1 << 21; + /// Always perform atime updates. + const STRICTATIME = 1 << 24; + } +} + +impl Default for PerMountFlags { + fn default() -> Self { + let empty = Self::empty(); + empty | Self::RELATIME + } +} + +impl PerMountFlags { + /// Gets the atime policy. + fn atime_policy(&self) -> AtimePolicy { + if self.contains(PerMountFlags::STRICTATIME) { + AtimePolicy::Strictatime + } else if self.contains(PerMountFlags::NOATIME) { + AtimePolicy::Noatime + } else { + AtimePolicy::Relatime + } + } +} + +/// The policy for updating access times (atime). +/// +/// A Mount can only have one of the following atime policies. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum AtimePolicy { + Relatime, + Noatime, + Strictatime, +} + +impl core::fmt::Display for PerMountFlags { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + if self.contains(PerMountFlags::RDONLY) { + write!(f, "ro")?; + } else { + write!(f, "rw")?; + }; + if self.contains(PerMountFlags::NOSUID) { + write!(f, ",nosuid")?; + } + if self.contains(PerMountFlags::NODEV) { + write!(f, ",nodev")?; + } + if self.contains(PerMountFlags::NOEXEC) { + write!(f, ",noexec")?; + } + if self.contains(PerMountFlags::NODIRATIME) { + write!(f, ",nodiratime")?; + } + let atime_policy = match self.atime_policy() { + AtimePolicy::Relatime => "relatime", + AtimePolicy::Noatime => "noatime", + AtimePolicy::Strictatime => "strictatime", + }; + write!(f, ",{}", atime_policy) + } +} + +impl From for PerMountFlags { + fn from(value: u32) -> Self { + Self::from_bits_truncate(value) + } +} + +impl From for u32 { + fn from(value: PerMountFlags) -> Self { + value.bits() + } +} + +define_atomic_version_of_integer_like_type!(PerMountFlags, { + /// An atomic version of `PerMountFlags`. + #[derive(Debug)] + pub struct AtomicPerMountFlags(AtomicU32); +}); + /// A `Mount` represents a mounted filesystem instance in the VFS. /// /// Each `Mount` can be viewed as a node in the mount tree, maintaining @@ -66,6 +166,8 @@ pub struct Mount { mnt_ns: Weak, /// Propagation type of this mount (e.g., private, shared). propagation: RwLock, + /// The flags of this mount. + flags: AtomicPerMountFlags, /// Reference to self. this: Weak, } @@ -82,7 +184,7 @@ impl Mount { fs: Arc, mnt_ns: Weak, ) -> Arc { - Self::new(fs, None, mnt_ns) + Self::new(fs, PerMountFlags::default(), None, mnt_ns) } /// The internal constructor. @@ -95,6 +197,7 @@ impl Mount { /// mount nodes must be explicitly assigned a mountpoint to maintain structural integrity. fn new( fs: Arc, + flags: PerMountFlags, parent_mount: Option>, mnt_ns: Weak, ) -> Arc { @@ -108,6 +211,7 @@ impl Mount { propagation: RwLock::new(MountPropType::default()), fs, mnt_ns, + flags: AtomicPerMountFlags::new(flags), this: weak_self.clone(), }) } @@ -131,6 +235,7 @@ impl Mount { pub(super) fn do_mount( self: &Arc, fs: Arc, + flags: PerMountFlags, mountpoint: &Arc, ) -> Result> { if mountpoint.type_() != InodeType::Dir { @@ -138,7 +243,7 @@ impl Mount { } let key = mountpoint.key(); - let child_mount = Self::new(fs, Some(Arc::downgrade(self)), self.mnt_ns.clone()); + let child_mount = Self::new(fs, flags, Some(Arc::downgrade(self)), self.mnt_ns.clone()); self.children.write().insert(key, child_mount.clone()); child_mount.set_mountpoint(mountpoint); @@ -181,6 +286,7 @@ impl Mount { propagation: RwLock::new(MountPropType::default()), fs: self.fs.clone(), mnt_ns: new_ns.cloned().unwrap_or_else(|| self.mnt_ns.clone()), + flags: AtomicPerMountFlags::new(self.flags.load(Ordering::Relaxed)), this: weak_self.clone(), }) } @@ -324,6 +430,45 @@ impl Mount { Ok(()) } + pub(super) fn remount( + &self, + mount_flags: PerMountFlags, + fs_flags: Option, + data: Option, + ctx: &Context, + ) -> Result<()> { + // TODO: This lock is a workaround to guarantee the atomicity of remount operation. + // We need to re-design the lock mechanism of `Mount` and file system in the future. + static REMOUNT_LOCK: Mutex<()> = Mutex::new(()); + + let _guard = REMOUNT_LOCK.lock(); + + if let Some(flags) = fs_flags { + self.fs.set_fs_flags(flags, data, ctx)?; + } + + // The logics here are consistent with Linux. + // In Linux, `NOATIME`, `RELATIME`, and `STRICTATIME` are mutually exclusive. + // If none of them nor `NODIRATIME` are set, the atime policy will be inherited + // from the old flags. + // Reference: https://elixir.bootlin.com/linux/v6.17/source/fs/namespace.c#L4097 + const ATIME_MASK: PerMountFlags = PerMountFlags::NOATIME + .union(PerMountFlags::RELATIME) + .union(PerMountFlags::STRICTATIME); + + let need_inherit_atime = !mount_flags.intersects(ATIME_MASK | PerMountFlags::NODIRATIME); + + if need_inherit_atime { + let old_flags = self.flags.load(Ordering::Relaxed); + let new_flags = mount_flags | (old_flags & ATIME_MASK); + self.flags.store(new_flags, Ordering::Relaxed); + } else { + self.flags.store(mount_flags, Ordering::Relaxed); + } + + Ok(()) + } + /// Gets the parent mount node if any. pub(super) fn parent(&self) -> Option> { self.parent.read().as_ref().cloned() @@ -400,14 +545,14 @@ impl Mount { // No parent means it's the root of the namespace. "/".to_string() }; + let mount_flags = self.flags.load(Ordering::Relaxed); let fs_type = mount.fs().name(); + let fs_flags = mount.fs().flags(); // The following fields are dummy for now. let major = 0; let minor = 0; - let mount_options = "rw,relatime"; let source = "none"; - let super_options = "rw"; let entry = MountInfoEntry { mount_id, @@ -416,10 +561,10 @@ impl Mount { minor, root: &root, mount_point: &mount_point, - mount_options, + mount_flags, fs_type, source, - super_options, + fs_flags, }; writeln!(printer, "{}", entry)?; @@ -468,14 +613,14 @@ struct MountInfoEntry<'a> { root: &'a str, /// The mount point relative to the process's root directory. mount_point: &'a str, - /// Per-mount options. - mount_options: &'a str, + /// Per-mount flags. + mount_flags: PerMountFlags, /// The type of the filesystem in the form "type[.subtype]". fs_type: &'a str, /// Filesystem-specific information or "none". source: &'a str, - /// Per-superblock options. - super_options: &'a str, + /// Per-filesystem flags. + fs_flags: FsFlags, } impl core::fmt::Display for MountInfoEntry<'_> { @@ -489,10 +634,10 @@ impl core::fmt::Display for MountInfoEntry<'_> { self.minor, &self.root, &self.mount_point, - &self.mount_options, + &self.mount_flags, &self.fs_type, &self.source, - &self.super_options + &self.fs_flags, ) } } diff --git a/kernel/src/fs/rootfs.rs b/kernel/src/fs/rootfs.rs index 430eb4c99..5a6d67c2f 100644 --- a/kernel/src/fs/rootfs.rs +++ b/kernel/src/fs/rootfs.rs @@ -10,7 +10,10 @@ use super::{ fs_resolver::{FsPath, FsResolver}, utils::{FileSystem, InodeMode, InodeType}, }; -use crate::{fs::path::is_dot, prelude::*}; +use crate::{ + fs::path::{is_dot, PerMountFlags}, + prelude::*, +}; struct BoxedReader<'a>(Box); @@ -114,6 +117,6 @@ pub fn mount_fs_at( ctx: &Context, ) -> Result<()> { let target_path = fs_resolver.lookup(fs_path)?; - target_path.mount(fs, ctx)?; + target_path.mount(fs, PerMountFlags::default(), ctx)?; Ok(()) } diff --git a/kernel/src/syscall/mount.rs b/kernel/src/syscall/mount.rs index 99e3024bc..5f5accf93 100644 --- a/kernel/src/syscall/mount.rs +++ b/kernel/src/syscall/mount.rs @@ -4,23 +4,23 @@ use super::SyscallReturn; use crate::{ fs::{ fs_resolver::{FsPath, AT_FDCWD}, - path::{MountPropType, Path}, + path::{MountPropType, Path, PerMountFlags}, registry::FsProperties, - utils::{FileSystem, InodeType}, + utils::{FileSystem, FsFlags, InodeType}, }, prelude::*, syscall::constants::MAX_FILENAME_LEN, }; -/// The `data` argument is interpreted by the different filesystems. -/// Typically it is a string of comma-separated options understood by -/// this filesystem. The current implementation only considers the case -/// where it is `NULL`. Because it should be interpreted by the specific filesystems. pub fn sys_mount( devname_addr: Vaddr, dirname_addr: Vaddr, fstype_addr: Vaddr, flags: u64, + // The `data` argument is interpreted by the different filesystems. + // Typically it is a string of comma-separated options understood by + // this filesystem. The current implementation only considers the case + // where it is `NULL`. Because it should be interpreted by the specific filesystems. data: Vaddr, ctx: &Context, ) -> Result { @@ -44,9 +44,10 @@ pub fn sys_mount( }; if mount_flags.contains(MountFlags::MS_REMOUNT) && mount_flags.contains(MountFlags::MS_BIND) { - do_reconfigure_mnt()?; + // If `MS_BIND` is specified, only the mount flags are changed. + do_remount_mnt(&dst_path, mount_flags, ctx)?; } else if mount_flags.contains(MountFlags::MS_REMOUNT) { - do_remount()?; + do_remount_mnt_and_fs(&dst_path, mount_flags, data, ctx)?; } else if mount_flags.contains(MountFlags::MS_BIND) { do_bind_mount( devname, @@ -59,21 +60,33 @@ pub fn sys_mount( } else if mount_flags.contains(MountFlags::MS_MOVE) { do_move_mount_old(devname, dst_path, ctx)?; } else { - do_new_mount(devname, fstype_addr, dst_path, data, ctx)?; + do_new_mount(devname, mount_flags, fstype_addr, dst_path, data, ctx)?; } Ok(SyscallReturn::Return(0)) } -fn do_reconfigure_mnt() -> Result<()> { - return_errno_with_message!(Errno::EINVAL, "do_reconfigure_mnt is not supported"); +/// Remounts the mount with new flags. +fn do_remount_mnt(path: &Path, flags: MountFlags, ctx: &Context) -> Result<()> { + let per_mount_flags = PerMountFlags::from(flags); + + path.remount(per_mount_flags, None, None, ctx) } -fn do_remount() -> Result<()> { - return_errno_with_message!(Errno::EINVAL, "do_remount is not supported"); +/// Remounts the filesystem with new flags and data. +fn do_remount_mnt_and_fs(path: &Path, flags: MountFlags, data: Vaddr, ctx: &Context) -> Result<()> { + let per_mount_flags = PerMountFlags::from(flags); + let fs_flags = FsFlags::from(flags); + let data = if data == 0 { + None + } else { + Some(ctx.user_space().read_cstring(data, MAX_FILENAME_LEN)?) + }; + + path.remount(per_mount_flags, Some(fs_flags), data, ctx) } -/// Bind a mount to a dst location. +/// Binds a mount to a dst location. /// /// If recursive is true, then bind the mount recursively. /// Such as use user command `mount --rbind src dst`. @@ -125,7 +138,7 @@ fn do_change_type(target_path: Path, flags: MountFlags, ctx: &Context) -> Result } } -/// Move a mount from src location to dst location. +/// Moves a mount from src location to dst location. fn do_move_mount_old(src_name: CString, dst_path: Path, ctx: &Context) -> Result<()> { let src_path = { let src_name = src_name.to_string_lossy(); @@ -160,7 +173,7 @@ fn do_new_mount( return_errno_with_message!(Errno::EINVAL, "fs_type is empty"); } let fs = get_fs(devname, flags, fs_type, data, ctx)?; - target_path.mount(fs, ctx)?; + target_path.mount(fs, flags.into(), ctx)?; Ok(()) } @@ -225,6 +238,13 @@ bitflags! { const MS_LAZYTIME = 1 << 25; // Update the on-disk [acm]times lazily. } } + +impl From for PerMountFlags { + fn from(flags: MountFlags) -> Self { + Self::from_bits_truncate(flags.bits()) + } +} + impl From for FsFlags { fn from(flags: MountFlags) -> Self { Self::from_bits_truncate(flags.bits())