diff --git a/kernel/src/process/clone.rs b/kernel/src/process/clone.rs index d27032e17..fd26acbe1 100644 --- a/kernel/src/process/clone.rs +++ b/kernel/src/process/clone.rs @@ -120,6 +120,13 @@ impl CloneArgs { ) -> Result { const FLAG_MASK: u64 = 0xff; let flags = CloneFlags::from(raw_flags & !FLAG_MASK); + if flags.contains(CloneFlags::CLONE_NEWNS | CloneFlags::CLONE_FS) { + return_errno_with_message!( + Errno::EINVAL, + "CLONE_NEWNS cannot be used with CLONE_FS for clone syscall" + ); + } + let exit_signal = raw_flags & FLAG_MASK; // Disambiguate the `parent_tid` parameter. The field is used // both for `CLONE_PIDFD` and `CLONE_PARENT_SETTID`, so at @@ -188,7 +195,8 @@ impl CloneFlags { | CloneFlags::CLONE_PARENT_SETTID | CloneFlags::CLONE_CHILD_SETTID | CloneFlags::CLONE_CHILD_CLEARTID - | CloneFlags::CLONE_VFORK; + | CloneFlags::CLONE_VFORK + | CloneFlags::CLONE_NEWNS; let unsupported_flags = *self - supported_flags; if !unsupported_flags.is_empty() { warn!("contains unsupported clone flags: {:?}", unsupported_flags); @@ -291,6 +299,13 @@ fn clone_child_task( posix_thread, )?; + if clone_flags.contains(CloneFlags::CLONE_NEWNS) { + child_fs + .resolver() + .write() + .switch_to_mnt_ns(child_ns_proxy.mnt_ns())?; + } + let child_user_ctx = Box::new(clone_user_ctx( parent_context, clone_args.stack, @@ -392,6 +407,13 @@ fn clone_child_process( posix_thread, )?; + if clone_flags.contains(CloneFlags::CLONE_NEWNS) { + child_fs + .resolver() + .write() + .switch_to_mnt_ns(child_ns_proxy.mnt_ns())?; + } + // Inherit the parent's signal mask let child_sig_mask = posix_thread.sig_mask().load(Ordering::Relaxed).into(); diff --git a/kernel/src/process/namespace/nsproxy.rs b/kernel/src/process/namespace/nsproxy.rs index 6b1c59a39..d55c2d8d3 100644 --- a/kernel/src/process/namespace/nsproxy.rs +++ b/kernel/src/process/namespace/nsproxy.rs @@ -169,6 +169,8 @@ impl ContextSetNsAdminApi for Context<'_> { // TODO: When setting a specific namespace, // other dependent fields of a posix thread may also need to be updated. + *self.thread_local.borrow_fs().resolver().write() = ns_proxy.mnt_ns.new_fs_resolver(); + *pthread_ns_proxy = Some(ns_proxy.clone()); thread_local_ns_proxy.replace(Some(ns_proxy)); } diff --git a/kernel/src/process/namespace/unshare.rs b/kernel/src/process/namespace/unshare.rs index b28809fb5..beaca65b2 100644 --- a/kernel/src/process/namespace/unshare.rs +++ b/kernel/src/process/namespace/unshare.rs @@ -58,6 +58,15 @@ impl ContextUnshareAdminApi for Context<'_> { let new_ns_proxy = thread_local_ns_proxy.new_clone(&user_ns_ref, flags, self.posix_thread)?; + if flags.contains(CloneFlags::CLONE_NEWNS) { + self.thread_local + .borrow_fs() + .resolver() + .write() + .switch_to_mnt_ns(new_ns_proxy.mnt_ns()) + .unwrap(); + } + *pthread_ns_proxy = Some(new_ns_proxy.clone()); *thread_local_ns_proxy = new_ns_proxy; diff --git a/kernel/src/process/posix_thread/thread_local.rs b/kernel/src/process/posix_thread/thread_local.rs index 12c7433f8..7072eba5d 100644 --- a/kernel/src/process/posix_thread/thread_local.rs +++ b/kernel/src/process/posix_thread/thread_local.rs @@ -151,6 +151,10 @@ impl ThreadLocal { self.fs.borrow_mut() } + pub fn is_fs_shared(&self) -> bool { + Arc::strong_count(&self.fs.borrow()) > 1 + } + pub fn sig_context(&self) -> &Cell> { &self.sig_context } diff --git a/kernel/src/syscall/setns.rs b/kernel/src/syscall/setns.rs index 1304c0722..fa5c35b44 100644 --- a/kernel/src/syscall/setns.rs +++ b/kernel/src/syscall/setns.rs @@ -11,7 +11,7 @@ //! 2. A `PidFile` opened by `pidfd_open` or by opening `/proc/[pid]` directory. use crate::{ - fs::file_table::FileDesc, + fs::{file_table::FileDesc, path::MountNamespace}, net::UtsNamespace, prelude::*, process::{ @@ -85,6 +85,18 @@ fn build_proxy_from_pid_file( set_uts_ns(&mut builder, target_ns, ctx)?; } + if flags.contains(CloneFlags::CLONE_NEWNS) { + if ctx.thread_local.is_fs_shared() { + return_errno_with_message!( + Errno::EINVAL, + "cannot change mount namespace with shared filesystem" + ); + } + + let target_ns = target_proxy.mnt_ns(); + set_mnt_ns(&mut builder, target_ns, ctx)?; + } + // TODO: Support setting other namespaces from the target process. Ok(builder.build()) @@ -110,3 +122,24 @@ fn set_uts_ns( Ok(()) } + +fn set_mnt_ns( + builder: &mut NsProxyBuilder, + target_ns: &Arc, + ctx: &Context, +) -> Result<()> { + // Verify the thread has SYS_ADMIN capability in the target namespace's owner + // and the current user namespace. + target_ns + .owner() + .check_cap(CapSet::SYS_ADMIN, ctx.posix_thread)?; + ctx.thread_local + .borrow_user_ns() + .check_cap(CapSet::SYS_ADMIN, ctx.posix_thread)?; + + // TODO: Are the checks above sufficient? + + builder.mnt_ns(target_ns.clone()); + + Ok(()) +} diff --git a/kernel/src/syscall/unshare.rs b/kernel/src/syscall/unshare.rs index 744e3d9c7..22477af4a 100644 --- a/kernel/src/syscall/unshare.rs +++ b/kernel/src/syscall/unshare.rs @@ -50,6 +50,7 @@ fn apply_implied_flags(flags: &mut CloneFlags) { fn check_flags(flags: CloneFlags, ctx: &Context) -> Result<()> { const VALID_FLAGS: CloneFlags = CloneFlags::CLONE_NS_FLAGS + .union(CloneFlags::CLONE_NEWNS) .union(CloneFlags::CLONE_FILES) .union(CloneFlags::CLONE_FS) .union(CloneFlags::CLONE_SYSVSEM)