diff --git a/book/src/kernel/linux-compatibility/README.md b/book/src/kernel/linux-compatibility/README.md index 6267e7b43..d35e9ccbf 100644 --- a/book/src/kernel/linux-compatibility/README.md +++ b/book/src/kernel/linux-compatibility/README.md @@ -328,7 +328,7 @@ provided by Linux on x86-64 architecture. | 305 | clock_adjtime | ❌ | | | 306 | syncfs | ❌ | | | 307 | sendmmsg | ❌ | | -| 308 | setns | ❌ | | +| 308 | setns | ✅ | | | 309 | getcpu | ✅ | | | 310 | process_vm_readv | ❌ | | | 311 | process_vm_writev | ❌ | | diff --git a/kernel/src/process/mod.rs b/kernel/src/process/mod.rs index e3de82341..2692328c7 100644 --- a/kernel/src/process/mod.rs +++ b/kernel/src/process/mod.rs @@ -25,7 +25,7 @@ pub use clone::{clone_child, CloneArgs, CloneFlags}; pub use credentials::{Credentials, Gid, Uid}; pub use kill::{kill, kill_all, kill_group, tgkill}; pub use namespace::{ - nsproxy::{check_unsupported_ns_flags, ContextNsAdminApi, NsProxy, NsProxyBuilder}, + nsproxy::{check_unsupported_ns_flags, ContextSetNsAdminApi, NsProxy, NsProxyBuilder}, unshare::ContextUnshareAdminApi, user_ns::UserNamespace, }; diff --git a/kernel/src/process/namespace/nsproxy.rs b/kernel/src/process/namespace/nsproxy.rs index 034cde294..6803a9e24 100644 --- a/kernel/src/process/namespace/nsproxy.rs +++ b/kernel/src/process/namespace/nsproxy.rs @@ -128,3 +128,22 @@ pub fn check_unsupported_ns_flags(flags: CloneFlags) -> Result<()> { warn!("unsupported clone ns flags: {:?}", unsupported_flags); return_errno_with_message!(Errno::EINVAL, "unsupported clone namespace flags"); } + +/// Provides administrative APIs for switching to existing namespaces. +pub trait ContextSetNsAdminApi { + /// Sets the namespace proxy for this context. + fn set_ns_proxy(&self, ns_proxy: Arc); +} + +impl ContextSetNsAdminApi for Context<'_> { + fn set_ns_proxy(&self, ns_proxy: Arc) { + let mut pthread_ns_proxy = self.posix_thread.ns_proxy().lock(); + let mut thread_local_ns_proxy = self.thread_local.borrow_ns_proxy_mut(); + + // TODO: When setting a specific namespace, + // other dependent fields of a posix thread may also need to be updated. + + *pthread_ns_proxy = Some(ns_proxy.clone()); + thread_local_ns_proxy.replace(Some(ns_proxy)); + } +} diff --git a/kernel/src/process/pid_file.rs b/kernel/src/process/pid_file.rs index 516f133f6..a2deb2951 100644 --- a/kernel/src/process/pid_file.rs +++ b/kernel/src/process/pid_file.rs @@ -57,7 +57,7 @@ impl PidFile { self.is_nonblocking.load(Ordering::Relaxed) } - pub(super) fn process(&self) -> &Arc { + pub fn process(&self) -> &Arc { &self.process } } diff --git a/kernel/src/syscall/arch/loongarch.rs b/kernel/src/syscall/arch/loongarch.rs index 437959b85..5660dd03e 100644 --- a/kernel/src/syscall/arch/loongarch.rs +++ b/kernel/src/syscall/arch/loongarch.rs @@ -117,6 +117,7 @@ use super::{ setgid::sys_setgid, setgroups::sys_setgroups, setitimer::{sys_getitimer, sys_setitimer}, + setns::sys_setns, setpgid::sys_setpgid, setregid::sys_setregid, setresgid::sys_setresgid, @@ -326,6 +327,7 @@ impl_syscall_nums_and_dispatch_fn! { SYS_ACCEPT4 = 242 => sys_accept4(args[..4]); SYS_WAIT4 = 260 => sys_wait4(args[..4]); SYS_PRLIMIT64 = 261 => sys_prlimit64(args[..4]); + SYS_SETNS = 268 => sys_setns(args[..2]); SYS_SCHED_SETATTR = 274 => sys_sched_setattr(args[..3]); SYS_SCHED_GETATTR = 275 => sys_sched_getattr(args[..4]); SYS_GETRANDOM = 278 => sys_getrandom(args[..3]); diff --git a/kernel/src/syscall/arch/riscv.rs b/kernel/src/syscall/arch/riscv.rs index a0eee4bcf..b1930168a 100644 --- a/kernel/src/syscall/arch/riscv.rs +++ b/kernel/src/syscall/arch/riscv.rs @@ -117,6 +117,7 @@ use super::{ setgid::sys_setgid, setgroups::sys_setgroups, setitimer::{sys_getitimer, sys_setitimer}, + setns::sys_setns, setpgid::sys_setpgid, setregid::sys_setregid, setresgid::sys_setresgid, @@ -328,6 +329,7 @@ impl_syscall_nums_and_dispatch_fn! { SYS_ACCEPT4 = 242 => sys_accept4(args[..4]); SYS_WAIT4 = 260 => sys_wait4(args[..4]); SYS_PRLIMIT64 = 261 => sys_prlimit64(args[..4]); + SYS_SETNS = 268 => sys_setns(args[..2]); SYS_SCHED_SETATTR = 274 => sys_sched_setattr(args[..3]); SYS_SCHED_GETATTR = 275 => sys_sched_getattr(args[..4]); SYS_GETRANDOM = 278 => sys_getrandom(args[..3]); diff --git a/kernel/src/syscall/arch/x86.rs b/kernel/src/syscall/arch/x86.rs index fb891ca2f..0183529b6 100644 --- a/kernel/src/syscall/arch/x86.rs +++ b/kernel/src/syscall/arch/x86.rs @@ -129,6 +129,7 @@ use super::{ setgid::sys_setgid, setgroups::sys_setgroups, setitimer::{sys_getitimer, sys_setitimer}, + setns::sys_setns, setpgid::sys_setpgid, setregid::sys_setregid, setresgid::sys_setresgid, @@ -376,6 +377,7 @@ impl_syscall_nums_and_dispatch_fn! { SYS_PREADV = 295 => sys_preadv(args[..4]); SYS_PWRITEV = 296 => sys_pwritev(args[..4]); SYS_PRLIMIT64 = 302 => sys_prlimit64(args[..4]); + SYS_SETNS = 308 => sys_setns(args[..2]); SYS_GETCPU = 309 => sys_getcpu(args[..3]); SYS_SCHED_SETATTR = 314 => sys_sched_setattr(args[..3]); SYS_SCHED_GETATTR = 315 => sys_sched_getattr(args[..4]); diff --git a/kernel/src/syscall/mod.rs b/kernel/src/syscall/mod.rs index 9d2a1ed5e..d7101ec45 100644 --- a/kernel/src/syscall/mod.rs +++ b/kernel/src/syscall/mod.rs @@ -145,6 +145,7 @@ mod setfsuid; mod setgid; mod setgroups; mod setitimer; +mod setns; mod setpgid; mod setregid; mod setresgid; diff --git a/kernel/src/syscall/setns.rs b/kernel/src/syscall/setns.rs new file mode 100644 index 000000000..1304c0722 --- /dev/null +++ b/kernel/src/syscall/setns.rs @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: MPL-2.0 + +//! This module implements the `setns` syscall. +//! +//! This syscall reassociates the calling thread with a namespace specified by a +//! file descriptor. The `flags` argument determines which type of namespace can be +//! joined. +//! +//! The file descriptor `fd` can refer to: +//! 1. A namespace file from `/proc/[pid]/ns/`. +//! 2. A `PidFile` opened by `pidfd_open` or by opening `/proc/[pid]` directory. + +use crate::{ + fs::file_table::FileDesc, + net::UtsNamespace, + prelude::*, + process::{ + check_unsupported_ns_flags, credentials::capabilities::CapSet, posix_thread::AsPosixThread, + CloneFlags, ContextSetNsAdminApi, NsProxy, NsProxyBuilder, PidFile, + }, + syscall::SyscallReturn, +}; + +pub fn sys_setns(fd: FileDesc, flags: u32, ctx: &Context) -> Result { + let ns_type_flags = CloneFlags::from_bits(flags) + .ok_or_else(|| Error::with_message(Errno::EINVAL, "invalid `setns` flags"))?; + debug!("setns flags = {:?}", ns_type_flags); + + let file = { + let file_table = ctx.thread_local.borrow_file_table(); + let file_table_locked = file_table.unwrap().read(); + file_table_locked.get_file(fd)?.clone() + }; + + let new_ns_proxy = if let Some(pid_file) = file.downcast_ref::() { + build_proxy_from_pid_file(pid_file, ns_type_flags, ctx)? + } + // TODO: Support setting namespaces from `/proc/[pid]/ns`. + else { + return_errno_with_message!( + Errno::EINVAL, + "the FD does not refer to a supported namespace file" + ); + }; + + // Install the newly created `NsProxy`. + ctx.set_ns_proxy(Arc::new(new_ns_proxy)); + + Ok(SyscallReturn::Return(0)) +} + +fn build_proxy_from_pid_file( + pid_file: &PidFile, + flags: CloneFlags, + ctx: &Context, +) -> Result { + if flags.is_empty() { + return_errno_with_message!(Errno::EINVAL, "flags must be specified with a PID file"); + } + + // Check for any flags that are not namespace-related. + if !(flags - CloneFlags::CLONE_NS_FLAGS).is_empty() { + return_errno_with_message!(Errno::EINVAL, "invalid flags are specified with a PID file"); + } + + if flags.contains(CloneFlags::CLONE_NEWUSER) { + return_errno_with_message!(Errno::EINVAL, "setting a user namespace is not supported"); + } + + check_unsupported_ns_flags(flags)?; + + let target_thread = pid_file.process().main_thread(); + let target_proxy = target_thread.as_posix_thread().unwrap().ns_proxy().lock(); + let Some(target_proxy) = target_proxy.as_ref() else { + return_errno_with_message!(Errno::ESRCH, "the target process has exited"); + }; + + let current_proxy = ctx.thread_local.borrow_ns_proxy(); + let current_proxy = current_proxy.unwrap(); + + let mut builder = NsProxyBuilder::new(current_proxy); + + if flags.contains(CloneFlags::CLONE_NEWUTS) { + let target_ns = target_proxy.uts_ns(); + set_uts_ns(&mut builder, target_ns, ctx)?; + } + + // TODO: Support setting other namespaces from the target process. + + Ok(builder.build()) +} + +fn set_uts_ns( + builder: &mut NsProxyBuilder, + target_ns: &Arc, + ctx: &Context, +) -> Result<()> { + // Verify the thread has SYS_ADMIN capability in the target namespace's owner + // and the current user namespace. + target_ns + .owner_ns() + .check_cap(CapSet::SYS_ADMIN, ctx.posix_thread)?; + ctx.thread_local + .borrow_user_ns() + .check_cap(CapSet::SYS_ADMIN, ctx.posix_thread)?; + + // TODO: Are the checks above sufficient? + + builder.uts_ns(target_ns.clone()); + + Ok(()) +} diff --git a/test/src/apps/namespace/setns.c b/test/src/apps/namespace/setns.c new file mode 100644 index 000000000..1c4b1234b --- /dev/null +++ b/test/src/apps/namespace/setns.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: MPL-2.0 + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include + +#include "../test.h" + +FN_TEST(set_ns_empty_flags) +{ + // FIXME: The following test will fail on Asterinas + // because it currently does not support ns file. + // const char *ns_path = "/proc/self/ns/user"; + // int fd_ns = TEST_SUCC(open(ns_path, O_RDONLY)); + // TEST_ERRNO(setns(fd_ns, 0), EINVAL); + // TEST_SUCC(close(fd_ns)); + + pid_t pid = getpid(); + int pidfd = TEST_SUCC(syscall(SYS_pidfd_open, pid, 0)); + TEST_ERRNO(setns(pidfd, 0), EINVAL); + TEST_SUCC(close(pidfd)); +} +END_TEST() + +FN_TEST(set_self_ns) +{ + // It is not permitted to use setns() to reenter the caller's + // current user namespace. This is different from other namespaces. + // FIXME: The following test will fail on Asterinas + // because it currently does not support ns file. + // const char *ns_path = "/proc/self/ns/user"; + // int fd_ns = TEST_SUCC(open(ns_path, O_RDONLY)); + // TEST_ERRNO(setns(fd_ns, CLONE_NEWUSER), EINVAL); + // TEST_SUCC(close(fd_ns)); + + pid_t pid = getpid(); + int pidfd = TEST_SUCC(syscall(SYS_pidfd_open, pid, 0)); + TEST_ERRNO(setns(pidfd, CLONE_NEWUSER), EINVAL); + TEST_SUCC(close(pidfd)); +} +END_TEST() diff --git a/test/src/apps/scripts/process.sh b/test/src/apps/scripts/process.sh index c03a1a5a4..7f5024f75 100755 --- a/test/src/apps/scripts/process.sh +++ b/test/src/apps/scripts/process.sh @@ -33,6 +33,7 @@ mmap/mmap_beyond_the_file mmap/mmap_shared_filebacked mmap/mmap_readahead mmap/mmap_vmrss +namespace/setns namespace/unshare process/group_session process/job_control