Support CLONE_NEWNS for related syscalls

This commit is contained in:
Chen Chengjun 2025-09-02 07:07:11 +00:00 committed by Tate, Hongliang Tian
parent bacbe58ff3
commit c56089309b
6 changed files with 73 additions and 2 deletions

View File

@ -120,6 +120,13 @@ impl CloneArgs {
) -> Result<Self> {
const FLAG_MASK: u64 = 0xff;
let flags = CloneFlags::from(raw_flags & !FLAG_MASK);
if flags.contains(CloneFlags::CLONE_NEWNS | CloneFlags::CLONE_FS) {
return_errno_with_message!(
Errno::EINVAL,
"CLONE_NEWNS cannot be used with CLONE_FS for clone syscall"
);
}
let exit_signal = raw_flags & FLAG_MASK;
// Disambiguate the `parent_tid` parameter. The field is used
// both for `CLONE_PIDFD` and `CLONE_PARENT_SETTID`, so at
@ -188,7 +195,8 @@ impl CloneFlags {
| CloneFlags::CLONE_PARENT_SETTID
| CloneFlags::CLONE_CHILD_SETTID
| CloneFlags::CLONE_CHILD_CLEARTID
| CloneFlags::CLONE_VFORK;
| CloneFlags::CLONE_VFORK
| CloneFlags::CLONE_NEWNS;
let unsupported_flags = *self - supported_flags;
if !unsupported_flags.is_empty() {
warn!("contains unsupported clone flags: {:?}", unsupported_flags);
@ -291,6 +299,13 @@ fn clone_child_task(
posix_thread,
)?;
if clone_flags.contains(CloneFlags::CLONE_NEWNS) {
child_fs
.resolver()
.write()
.switch_to_mnt_ns(child_ns_proxy.mnt_ns())?;
}
let child_user_ctx = Box::new(clone_user_ctx(
parent_context,
clone_args.stack,
@ -392,6 +407,13 @@ fn clone_child_process(
posix_thread,
)?;
if clone_flags.contains(CloneFlags::CLONE_NEWNS) {
child_fs
.resolver()
.write()
.switch_to_mnt_ns(child_ns_proxy.mnt_ns())?;
}
// Inherit the parent's signal mask
let child_sig_mask = posix_thread.sig_mask().load(Ordering::Relaxed).into();

View File

@ -169,6 +169,8 @@ impl ContextSetNsAdminApi for Context<'_> {
// TODO: When setting a specific namespace,
// other dependent fields of a posix thread may also need to be updated.
*self.thread_local.borrow_fs().resolver().write() = ns_proxy.mnt_ns.new_fs_resolver();
*pthread_ns_proxy = Some(ns_proxy.clone());
thread_local_ns_proxy.replace(Some(ns_proxy));
}

View File

@ -58,6 +58,15 @@ impl ContextUnshareAdminApi for Context<'_> {
let new_ns_proxy =
thread_local_ns_proxy.new_clone(&user_ns_ref, flags, self.posix_thread)?;
if flags.contains(CloneFlags::CLONE_NEWNS) {
self.thread_local
.borrow_fs()
.resolver()
.write()
.switch_to_mnt_ns(new_ns_proxy.mnt_ns())
.unwrap();
}
*pthread_ns_proxy = Some(new_ns_proxy.clone());
*thread_local_ns_proxy = new_ns_proxy;

View File

@ -151,6 +151,10 @@ impl ThreadLocal {
self.fs.borrow_mut()
}
pub fn is_fs_shared(&self) -> bool {
Arc::strong_count(&self.fs.borrow()) > 1
}
pub fn sig_context(&self) -> &Cell<Option<Vaddr>> {
&self.sig_context
}

View File

@ -11,7 +11,7 @@
//! 2. A `PidFile` opened by `pidfd_open` or by opening `/proc/[pid]` directory.
use crate::{
fs::file_table::FileDesc,
fs::{file_table::FileDesc, path::MountNamespace},
net::UtsNamespace,
prelude::*,
process::{
@ -85,6 +85,18 @@ fn build_proxy_from_pid_file(
set_uts_ns(&mut builder, target_ns, ctx)?;
}
if flags.contains(CloneFlags::CLONE_NEWNS) {
if ctx.thread_local.is_fs_shared() {
return_errno_with_message!(
Errno::EINVAL,
"cannot change mount namespace with shared filesystem"
);
}
let target_ns = target_proxy.mnt_ns();
set_mnt_ns(&mut builder, target_ns, ctx)?;
}
// TODO: Support setting other namespaces from the target process.
Ok(builder.build())
@ -110,3 +122,24 @@ fn set_uts_ns(
Ok(())
}
fn set_mnt_ns(
builder: &mut NsProxyBuilder,
target_ns: &Arc<MountNamespace>,
ctx: &Context,
) -> Result<()> {
// Verify the thread has SYS_ADMIN capability in the target namespace's owner
// and the current user namespace.
target_ns
.owner()
.check_cap(CapSet::SYS_ADMIN, ctx.posix_thread)?;
ctx.thread_local
.borrow_user_ns()
.check_cap(CapSet::SYS_ADMIN, ctx.posix_thread)?;
// TODO: Are the checks above sufficient?
builder.mnt_ns(target_ns.clone());
Ok(())
}

View File

@ -50,6 +50,7 @@ fn apply_implied_flags(flags: &mut CloneFlags) {
fn check_flags(flags: CloneFlags, ctx: &Context) -> Result<()> {
const VALID_FLAGS: CloneFlags = CloneFlags::CLONE_NS_FLAGS
.union(CloneFlags::CLONE_NEWNS)
.union(CloneFlags::CLONE_FILES)
.union(CloneFlags::CLONE_FS)
.union(CloneFlags::CLONE_SYSVSEM)