Add syscall unshare

This commit is contained in:
jiangjianfeng 2025-08-04 09:37:45 +00:00 committed by Tate, Hongliang Tian
parent 83b6e2da5c
commit cca73480c2
15 changed files with 305 additions and 1 deletions

View File

@ -292,7 +292,7 @@ provided by Linux on x86-64 architecture.
| 269 | faccessat | ✅ | |
| 270 | pselect6 | ✅ | |
| 271 | ppoll | ✅ | |
| 272 | unshare | | |
| 272 | unshare | | |
| 273 | set_robust_list | ✅ | |
| 274 | get_robust_list | ❌ | |
| 275 | splice | ❌ | |

View File

@ -82,6 +82,13 @@ impl<'a> CurrentUserSpace<'a> {
self.0.as_ref().unwrap()
}
/// Returns whether the VMAR is shared with other processes or threads.
pub fn is_vmar_shared(&self) -> bool {
// If the VMAR is not shared, its reference count should be exactly 2:
// one reference is held by `ThreadLocal` and the other by `ProcessVm` in `Process`.
self.root_vmar().reference_count() != 2
}
/// Creates a reader to read data from the user space of the current task.
///
/// Returns `Err` if `vaddr` and `len` do not represent a user space memory range.

View File

@ -26,6 +26,7 @@ pub use credentials::{Credentials, Gid, Uid};
pub use kill::{kill, kill_all, kill_group, tgkill};
pub use namespace::{
nsproxy::{check_unsupported_ns_flags, ContextNsAdminApi, NsProxy, NsProxyBuilder},
unshare::ContextUnshareAdminApi,
user_ns::UserNamespace,
};
pub use pid_file::PidFile;

View File

@ -1,4 +1,5 @@
// SPDX-License-Identifier: MPL-2.0
pub(super) mod nsproxy;
pub(super) mod unshare;
pub(super) mod user_ns;

View File

@ -0,0 +1,66 @@
// SPDX-License-Identifier: MPL-2.0
use ostd::sync::RwArc;
use crate::{prelude::*, process::CloneFlags};
/// Provides administrative APIs for disassociating execution contexts.
pub trait ContextUnshareAdminApi {
/// Unshares the file table.
fn unshare_files(&self);
/// Unshares filesystem attributes.
fn unshare_fs(&self);
/// Unshares System V semaphore.
fn unshare_sysvsem(&self);
/// Creates and enters new namespaces as specified by the `flags` argument.
fn unshare_namespaces(&self, flags: CloneFlags) -> Result<()>;
}
impl ContextUnshareAdminApi for Context<'_> {
fn unshare_files(&self) {
let mut pthread_file_table = self.posix_thread.file_table().lock();
let mut thread_local_file_table_ref = self.thread_local.borrow_file_table_mut();
let thread_local_file_table = thread_local_file_table_ref.unwrap();
let new_file_table = RwArc::new(thread_local_file_table.read().clone());
*pthread_file_table = Some(new_file_table.clone_ro());
*thread_local_file_table = new_file_table;
}
fn unshare_fs(&self) {
let mut fs_ref = self.thread_local.borrow_fs_mut();
let new_fs = fs_ref.as_ref().clone();
*fs_ref = Arc::new(new_fs);
}
fn unshare_sysvsem(&self) {
// TODO: Support unsharing System V semaphore.
warn!("unsharing System V semaphore is not supported");
}
fn unshare_namespaces(&self, flags: CloneFlags) -> Result<()> {
if flags.contains(CloneFlags::CLONE_NEWUSER) {
return_errno_with_message!(
Errno::EINVAL,
"cloning a new user namespace is not supported"
);
}
let user_ns_ref = self.thread_local.borrow_user_ns();
let mut pthread_ns_proxy = self.posix_thread.ns_proxy().lock();
let mut thread_local_ns_proxy_ref = self.thread_local.borrow_ns_proxy_mut();
let thread_local_ns_proxy = thread_local_ns_proxy_ref.unwrap();
let new_ns_proxy =
thread_local_ns_proxy.new_clone(&user_ns_ref, flags, self.posix_thread)?;
*pthread_ns_proxy = Some(new_ns_proxy.clone());
*thread_local_ns_proxy = new_ns_proxy;
Ok(())
}
}

View File

@ -148,6 +148,7 @@ use super::{
umount::sys_umount,
uname::sys_uname,
unlink::sys_unlinkat,
unshare::sys_unshare,
utimens::sys_utimensat,
wait4::sys_wait4,
waitid::sys_waitid,
@ -232,6 +233,7 @@ impl_syscall_nums_and_dispatch_fn! {
SYS_EXIT_GROUP = 94 => sys_exit_group(args[..1]);
SYS_WAITID = 95 => sys_waitid(args[..5]);
SYS_SET_TID_ADDRESS = 96 => sys_set_tid_address(args[..1]);
SYS_UNSHARE = 97 => sys_unshare(args[..1]);
SYS_FUTEX = 98 => sys_futex(args[..6]);
SYS_SET_ROBUST_LIST = 99 => sys_set_robust_list(args[..2]);
SYS_NANOSLEEP = 101 => sys_nanosleep(args[..2]);

View File

@ -148,6 +148,7 @@ use super::{
umount::sys_umount,
uname::sys_uname,
unlink::sys_unlinkat,
unshare::sys_unshare,
utimens::sys_utimensat,
wait4::sys_wait4,
waitid::sys_waitid,
@ -232,6 +233,7 @@ impl_syscall_nums_and_dispatch_fn! {
SYS_EXIT_GROUP = 94 => sys_exit_group(args[..1]);
SYS_WAITID = 95 => sys_waitid(args[..5]);
SYS_SET_TID_ADDRESS = 96 => sys_set_tid_address(args[..1]);
SYS_UNSHARE = 97 => sys_unshare(args[..1]);
SYS_FUTEX = 98 => sys_futex(args[..6]);
SYS_SET_ROBUST_LIST = 99 => sys_set_robust_list(args[..2]);
SYS_NANOSLEEP = 101 => sys_nanosleep(args[..2]);

View File

@ -161,6 +161,7 @@ use super::{
umount::sys_umount,
uname::sys_uname,
unlink::{sys_unlink, sys_unlinkat},
unshare::sys_unshare,
utimens::{sys_futimesat, sys_utime, sys_utimensat, sys_utimes},
wait4::sys_wait4,
waitid::sys_waitid,
@ -356,6 +357,7 @@ impl_syscall_nums_and_dispatch_fn! {
SYS_FACCESSAT = 269 => sys_faccessat(args[..3]);
SYS_PSELECT6 = 270 => sys_pselect6(args[..6]);
SYS_PPOLL = 271 => sys_ppoll(args[..5]);
SYS_UNSHARE = 272 => sys_unshare(args[..1]);
SYS_SET_ROBUST_LIST = 273 => sys_set_robust_list(args[..2]);
SYS_UTIMENSAT = 280 => sys_utimensat(args[..4]);
SYS_EPOLL_PWAIT = 281 => sys_epoll_pwait(args[..6]);

View File

@ -177,6 +177,7 @@ mod umask;
mod umount;
mod uname;
mod unlink;
mod unshare;
mod utimens;
mod wait4;
mod waitid;

View File

@ -0,0 +1,82 @@
// SPDX-License-Identifier: MPL-2.0
use crate::{
prelude::*,
process::{CloneFlags, ContextUnshareAdminApi},
syscall::SyscallReturn,
};
pub fn sys_unshare(unshare_flags: u32, ctx: &Context) -> Result<SyscallReturn> {
let mut flags = CloneFlags::from_bits(unshare_flags)
.ok_or_else(|| Error::with_message(Errno::EINVAL, "invalid `unshare` flags"))?;
debug!("unshare flags = {:?}", flags);
apply_implied_flags(&mut flags);
check_flags(flags, ctx)?;
if flags.contains(CloneFlags::CLONE_FILES) {
ctx.unshare_files();
}
if flags.contains(CloneFlags::CLONE_FS) {
ctx.unshare_fs();
}
if flags.contains(CloneFlags::CLONE_SYSVSEM) {
ctx.unshare_sysvsem();
}
let ns_flags = flags.intersection(CloneFlags::CLONE_NS_FLAGS);
if !ns_flags.is_empty() {
ctx.unshare_namespaces(ns_flags)?;
}
Ok(SyscallReturn::Return(0))
}
fn apply_implied_flags(flags: &mut CloneFlags) {
if flags.contains(CloneFlags::CLONE_NEWUSER) {
*flags |= CloneFlags::CLONE_THREAD | CloneFlags::CLONE_FS;
}
if flags.contains(CloneFlags::CLONE_SIGHAND) {
*flags |= CloneFlags::CLONE_THREAD;
}
if flags.contains(CloneFlags::CLONE_NEWNS) {
*flags |= CloneFlags::CLONE_FS;
}
}
fn check_flags(flags: CloneFlags, ctx: &Context) -> Result<()> {
const VALID_FLAGS: CloneFlags = CloneFlags::CLONE_NS_FLAGS
.union(CloneFlags::CLONE_FILES)
.union(CloneFlags::CLONE_FS)
.union(CloneFlags::CLONE_SYSVSEM)
.union(CloneFlags::CLONE_THREAD)
.union(CloneFlags::CLONE_VM)
.union(CloneFlags::CLONE_SIGHAND);
let invalid_flags = flags - VALID_FLAGS;
if !invalid_flags.is_empty() {
return_errno_with_message!(Errno::EINVAL, "unsupported `unshare` flags");
}
if flags.intersects(CloneFlags::CLONE_THREAD | CloneFlags::CLONE_VM | CloneFlags::CLONE_SIGHAND)
&& ctx.process.tasks().lock().as_slice().len() != 1
{
return_errno_with_message!(
Errno::EINVAL,
"`CLONE_THREAD`, `CLONE_VM`, and `CLONE_SIGHAND` can be specified only if the process is single-threaded"
);
}
if flags.contains(CloneFlags::CLONE_VM) && ctx.user_space().is_vmar_shared() {
return_errno_with_message!(
Errno::EINVAL,
"`CLONE_VM` can only be used when the VMAR is not shared"
);
}
Ok(())
}

View File

@ -129,6 +129,11 @@ impl<R> Vmar<R> {
) -> Result<Vaddr> {
self.0.remap(old_addr, old_size, new_addr, new_size)
}
/// Returns the reference count of the VMAR.
pub fn reference_count(&self) -> usize {
Arc::strong_count(&self.0)
}
}
pub(super) struct Vmar_ {

View File

@ -30,6 +30,7 @@ TEST_APPS := \
itimer \
mmap \
mongoose \
namespace \
network \
pipe \
prctl \

View File

@ -0,0 +1,5 @@
# SPDX-License-Identifier: MPL-2.0
include ../test_common.mk
EXTRA_C_FLAGS :=

View File

@ -0,0 +1,128 @@
// SPDX-License-Identifier: MPL-2.0
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sched.h>
#include <sys/stat.h>
#include <errno.h>
#include <pthread.h>
#include <fcntl.h>
#include "../test.h"
FN_TEST(invalid_flags)
{
TEST_ERRNO(unshare(CLONE_CHILD_CLEARTID), EINVAL);
TEST_ERRNO(unshare(CLONE_CHILD_SETTID), EINVAL);
TEST_ERRNO(unshare(CLONE_DETACHED), EINVAL);
TEST_ERRNO(unshare(CLONE_IO), EINVAL);
TEST_ERRNO(unshare(CLONE_PARENT), EINVAL);
TEST_ERRNO(unshare(CLONE_PARENT_SETTID), EINVAL);
TEST_ERRNO(unshare(CLONE_PIDFD), EINVAL);
TEST_ERRNO(unshare(CLONE_PTRACE), EINVAL);
TEST_ERRNO(unshare(CLONE_SETTLS), EINVAL);
TEST_ERRNO(unshare(CLONE_UNTRACED), EINVAL);
TEST_ERRNO(unshare(CLONE_VFORK), EINVAL);
}
END_TEST()
void *sleep_1s_thread(void *arg)
{
sleep(1);
}
FN_TEST(single_thread_flags)
{
TEST_SUCC(unshare(CLONE_VM | CLONE_SIGHAND | CLONE_THREAD));
pthread_t thread_id;
TEST_SUCC(pthread_create(&thread_id, NULL, sleep_1s_thread, NULL));
TEST_ERRNO(unshare(CLONE_VM), EINVAL);
TEST_ERRNO(unshare(CLONE_SIGHAND), EINVAL);
TEST_ERRNO(unshare(CLONE_THREAD), EINVAL);
TEST_SUCC(pthread_join(thread_id, NULL));
TEST_SUCC(unshare(CLONE_VM));
TEST_SUCC(unshare(CLONE_SIGHAND));
TEST_SUCC(unshare(CLONE_THREAD));
}
END_TEST()
void *unshare_files_thread(void *arg)
{
int test_fd = (int)(intptr_t)arg;
CHECK(unshare(CLONE_FILES));
CHECK(close(test_fd));
}
FN_TEST(unshare_files)
{
struct stat old_stdin_stat, old_stdout_stat, old_stderr_stat;
struct stat new_stdin_stat, new_stdout_stat, new_stderr_stat;
TEST_SUCC(fstat(STDIN_FILENO, &old_stdin_stat));
TEST_SUCC(fstat(STDOUT_FILENO, &old_stdout_stat));
TEST_SUCC(fstat(STDERR_FILENO, &old_stderr_stat));
TEST_SUCC(unshare(CLONE_FILES));
TEST_RES(fstat(STDIN_FILENO, &new_stdin_stat),
old_stdin_stat.st_ino == new_stdin_stat.st_ino);
TEST_RES(fstat(STDOUT_FILENO, &new_stdout_stat),
old_stdout_stat.st_ino == new_stdout_stat.st_ino);
TEST_RES(fstat(STDERR_FILENO, &new_stderr_stat),
old_stderr_stat.st_ino == new_stderr_stat.st_ino);
const char *TEST_FILENAME = "/tmp/unshare_files_test.txt";
pthread_t thread_id;
struct stat stat1, stat2;
int test_fd;
test_fd = TEST_SUCC(
open(TEST_FILENAME, O_CREAT | O_RDWR | O_TRUNC, 0644));
TEST_SUCC(fstat(test_fd, &stat1));
TEST_SUCC(pthread_create(&thread_id, NULL, unshare_files_thread,
(void *)(intptr_t)test_fd));
TEST_SUCC(pthread_join(thread_id, NULL));
TEST_RES(fstat(test_fd, &stat2), stat1.st_ino == stat2.st_ino);
TEST_SUCC(close(test_fd));
TEST_SUCC(unlink(TEST_FILENAME));
}
END_TEST()
#define CWD_BUF_SIZE 1024
#define THREAD_CWD "/tmp"
void *unshare_fs_thread(void *arg)
{
CHECK(unshare(CLONE_FS));
CHECK(chdir(THREAD_CWD));
CHECK(getcwd((char *)arg, CWD_BUF_SIZE));
}
FN_TEST(unshare_fs)
{
char cwd_buf1[CWD_BUF_SIZE], cwd_buf2[CWD_BUF_SIZE];
pthread_t thread_id;
TEST_RES(getcwd(cwd_buf1, CWD_BUF_SIZE),
strcmp(cwd_buf1, THREAD_CWD) != 0);
TEST_SUCC(pthread_create(&thread_id, NULL, unshare_fs_thread,
(void *)cwd_buf2));
TEST_RES(pthread_join(thread_id, NULL),
strcmp(cwd_buf2, THREAD_CWD) == 0);
TEST_RES(getcwd(cwd_buf2, CWD_BUF_SIZE),
strcmp(cwd_buf1, cwd_buf2) == 0);
}
END_TEST()

View File

@ -33,6 +33,7 @@ mmap/mmap_beyond_the_file
mmap/mmap_shared_filebacked
mmap/mmap_readahead
mmap/mmap_vmrss
namespace/unshare
process/group_session
process/job_control
process/pidfd