This commit is contained in:
Chengjun Chen 2026-02-11 14:15:57 +08:00 committed by GitHub
commit 546a52a734
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 363 additions and 15 deletions

View File

@ -175,7 +175,7 @@ which are summarized in the table below.
| 152 | munlockall | ❌ | N/A |
| 153 | vhangup | ❌ | N/A |
| 154 | modify_ldt | ❌ | N/A |
| 155 | pivot_root | ❌ | N/A |
| 155 | pivot_root | ✅ | 💯 |
| 156 | _sysctl | ❌ | N/A |
| 157 | prctl | ✅ | [⚠️](syscall-flag-coverage/namespaces-cgroups-and-security/#prctl) |
| 158 | arch_prctl | ✅ | [⚠️](syscall-flag-coverage/system-information-and-misc/#arch_prctl) |

View File

@ -14,3 +14,6 @@ chroot(path);
// Remove a watch from an inotify instance
inotify_rm_watch(fd, wd);
// Change the root mount in the mount namespace of the calling thread
pivot_root(new_root, put_old);

View File

@ -31,7 +31,7 @@
engine = {
cgroup_manager = "cgroupfs";
events_logger = "none";
no_pivot_root = true;
no_pivot_root = false;
runtime = "runc";
};
};

View File

@ -159,7 +159,7 @@ impl Path {
/// For example, first `mount /dev/sda1 /mnt` and then `mount /dev/sda2 /mnt`.
/// After the second mount is completed, the content of the first mount will be overridden.
/// We need to recursively obtain the top `Path`.
pub(super) fn get_top_path(mut self) -> Self {
fn get_top_path(mut self) -> Self {
while self.dentry.is_mountpoint() {
if let Some(child_mount) = self.mount.get(&self.dentry) {
let inner = child_mount.root_dentry().clone();
@ -173,13 +173,47 @@ impl Path {
}
/// Finds the corresponding `Path` in the given mount namespace.
pub(super) fn find_corresponding_mount(&self, mnt_ns: &Arc<MountNamespace>) -> Option<Self> {
fn find_corresponding_mount(&self, mnt_ns: &Arc<MountNamespace>) -> Option<Self> {
let corresponding_mount = self.mount.find_corresponding_mount(mnt_ns)?;
let corresponding_path = Self::new(corresponding_mount, self.dentry.clone());
Some(corresponding_path)
}
/// Checks if this path is reachable from the given `root` path.
///
/// A path is considered reachable if it is the same as or a descendant
/// of the `root` path. The check traverses upwards from the current path,
/// crossing mount point boundaries as necessary, until it either finds
/// the `root` path or reaches the global root.
fn is_reachable_from(&self, root: &Path) -> bool {
let mut owned;
let mut current = self;
loop {
if current.mount.id() != root.mount.id() {
let Some(parent_mount) = current.mount.parent().and_then(|mount| mount.upgrade())
else {
return false;
};
owned = Path::new(
parent_mount,
current
.mount
.mountpoint()
.expect("Mounts with parents must have a mount point")
.clone(),
);
current = &owned;
continue;
}
return current.dentry.is_equal_or_descendant_of(&root.dentry);
}
}
/// Returns true if the `Path` represents a pseudo file.
fn is_pseudo(&self) -> bool {
self.dentry.is_pseudo()
@ -333,7 +367,7 @@ impl Path {
}
let new_mount = self.mount.clone_mount_tree(&self.dentry, None, recursive);
new_mount.graft_mount_tree(dst_path)?;
new_mount.graft_mount_tree(dst_path);
Ok(())
}
@ -369,7 +403,9 @@ impl Path {
);
}
self.mount.graft_mount_tree(dst_path)
self.mount.graft_mount_tree(dst_path);
Ok(())
}
/// Sets the propagation type of the mount of this `Path`.

View File

@ -420,10 +420,9 @@ impl Mount {
}
/// Grafts the mount node tree to the mountpoint.
pub(super) fn graft_mount_tree(&self, target_path: &Path) -> Result<()> {
pub(super) fn graft_mount_tree(&self, target_path: &Path) {
self.detach_from_parent();
self.attach_to_path(target_path);
Ok(())
}
/// Gets a child mount node from the mountpoint if any.

View File

@ -12,7 +12,7 @@ use crate::{
utils::{InodeType, NAME_MAX, PATH_MAX, Permission, SYMLINKS_MAX, SymbolicLink},
},
prelude::*,
process::posix_thread::AsThreadLocal,
process::posix_thread::{AsPosixThread, AsThreadLocal, thread_table::ThreadTable},
};
/// The file descriptor of the current working directory.
@ -228,6 +228,121 @@ impl PathResolver {
Ok(())
}
/// Changes the root mount in the mount namespace of the calling thread.
///
/// This function moves the original root mount of the calling thread to `put_old_path` and makes
/// `new_root_path` the new root mount. For other threads in the current mount namespace, if their
/// root directory and current working directory are the same as the current thread's root directory,
/// they will also be changed to `new_root_path`.
//
// TODO: this method should only iterate threads in the current PID namespace instead of
// the whole thread table.
pub fn pivot_root(
&mut self,
new_root_path: FsPath,
put_old_path: FsPath,
thread_table: &ThreadTable,
ctx: &Context,
) -> Result<()> {
let new_root_path = self.lookup(&new_root_path)?;
let put_old_path = self.lookup(&put_old_path)?;
if new_root_path.type_() != InodeType::Dir || put_old_path.type_() != InodeType::Dir {
return_errno_with_message!(
Errno::ENOTDIR,
"`new_root` or `put_old` is not a directory"
);
}
if self.root.mount.id() == new_root_path.mount.id()
|| self.root.mount.id() == put_old_path.mount.id()
{
return_errno_with_message!(
Errno::EBUSY,
"`new_root` or `put_old` is on the current root mount"
);
}
if !new_root_path.is_mount_root() || !self.root.is_mount_root() {
return_errno_with_message!(
Errno::EINVAL,
"`new_root` or the current root is not a mount point"
);
}
if new_root_path.mount.parent().is_none() || self.root.mount.parent().is_none() {
return_errno_with_message!(
Errno::EINVAL,
"`new_root` or the current root is on the rootfs mount"
);
}
let current_ns_proxy = ctx.thread_local.borrow_ns_proxy();
let current_mnt_ns = current_ns_proxy.unwrap().mnt_ns();
if !current_mnt_ns.owns(&new_root_path.mount) || !current_mnt_ns.owns(&put_old_path.mount) {
return_errno_with_message!(
Errno::EINVAL,
"`new_root` or `put_old` is not in the current mount namespace"
);
}
if !put_old_path.is_reachable_from(&new_root_path) {
return_errno_with_message!(
Errno::EINVAL,
"`put_old` is not at or underneath `new_root`"
);
}
if !new_root_path.is_reachable_from(&self.root) {
return_errno_with_message!(
Errno::EINVAL,
"`new_root` is not underneath the current root"
);
}
// TODO: Check the following once we support `MS_SHARED`:
// "The propagation type of the parent mount of `new_root` and the
// parent mount of the current root directory must not be
// `MS_SHARED`; similarly, if `put_old` is an existing mount point,
// its propagation type must not be `MS_SHARED`."
let parent_path = {
let parent_mount = self.root.mount.parent().unwrap().upgrade().unwrap();
let mountpoint = self.root.mount.mountpoint().unwrap();
Path::new(parent_mount, mountpoint)
};
self.root.mount.graft_mount_tree(&put_old_path);
new_root_path.mount.graft_mount_tree(&parent_path);
for thread in thread_table.values() {
let posix_thread = thread.as_posix_thread().unwrap();
let ns_proxy = posix_thread.ns_proxy().lock();
let Some(ns_proxy) = ns_proxy.as_ref() else {
continue;
};
let mnt_ns = ns_proxy.mnt_ns();
if !Arc::ptr_eq(mnt_ns, current_mnt_ns) {
continue;
}
let fs = posix_thread.read_fs();
if Arc::ptr_eq(&fs, &ctx.thread_local.borrow_fs()) {
continue;
}
let mut fs_resolver = fs.resolver().write();
if fs_resolver.root() == &self.root {
fs_resolver.set_root(new_root_path.clone());
}
if fs_resolver.cwd() == &self.root {
fs_resolver.set_cwd(new_root_path.clone());
}
}
if self.cwd == self.root {
self.cwd = new_root_path.clone();
}
self.root = new_root_path;
Ok(())
}
}
/// The result of resolving an absolute path name.

View File

@ -3,20 +3,22 @@
use super::{Thread, Tid};
use crate::{prelude::*, process::posix_thread::AsPosixThread};
static THREAD_TABLE: SpinLock<BTreeMap<Tid, Arc<Thread>>> = SpinLock::new(BTreeMap::new());
pub type ThreadTable = BTreeMap<Tid, Arc<Thread>>;
/// Adds a posix thread to global thread table
static THREAD_TABLE: Mutex<ThreadTable> = Mutex::new(BTreeMap::new());
/// Adds a POSIX thread to the global thread table.
pub fn add_thread(tid: Tid, thread: Arc<Thread>) {
debug_assert_eq!(tid, thread.as_posix_thread().unwrap().tid());
THREAD_TABLE.lock().insert(tid, thread);
}
/// Removes a posix thread to global thread table
/// Removes a POSIX thread from the global thread table.
pub fn remove_thread(tid: Tid) {
THREAD_TABLE.lock().remove(&tid);
}
/// Gets a posix thread from the global thread table
/// Gets a POSIX thread from the global thread table.
pub fn get_thread(tid: Tid) -> Option<Arc<Thread>> {
THREAD_TABLE.lock().get(&tid).cloned()
}
@ -47,3 +49,12 @@ pub(in crate::process) fn make_current_main_thread(ctx: &Context) {
let thread = thread_table.remove(&old_tid).unwrap();
thread_table.insert(pid, thread);
}
/// Applies the given function to the global thread table.
pub fn with_global_threads<F, R>(f: F) -> R
where
F: FnOnce(&ThreadTable) -> R,
{
let table = THREAD_TABLE.lock();
f(&table)
}

View File

@ -84,6 +84,7 @@ macro_rules! import_generic_syscall_entries {
pidfd_open::sys_pidfd_open,
pidfd_send_signal::sys_pidfd_send_signal,
pipe::sys_pipe2,
pivot_root::sys_pivot_root,
ppoll::sys_ppoll,
prctl::sys_prctl,
pread64::sys_pread64,
@ -234,6 +235,7 @@ macro_rules! define_syscalls_with_generic_syscall_table {
SYS_LINKAT = 37 => sys_linkat(args[..5]);
SYS_UMOUNT = 39 => sys_umount(args[..2]);
SYS_MOUNT = 40 => sys_mount(args[..5]);
SYS_PIVOT_ROOT = 41 => sys_pivot_root(args[..2]);
SYS_STATFS = 43 => sys_statfs(args[..2]);
SYS_FSTATFS = 44 => sys_fstatfs(args[..2]);
SYS_TRUNCATE = 45 => sys_truncate(args[..2]);

View File

@ -85,6 +85,7 @@ use super::{
pidfd_open::sys_pidfd_open,
pidfd_send_signal::sys_pidfd_send_signal,
pipe::{sys_pipe, sys_pipe2},
pivot_root::sys_pivot_root,
poll::sys_poll,
ppoll::sys_ppoll,
prctl::sys_prctl,
@ -309,6 +310,7 @@ impl_syscall_nums_and_dispatch_fn! {
SYS_SCHED_GETSCHEDULER = 145 => sys_sched_getscheduler(args[..1]);
SYS_SCHED_GET_PRIORITY_MAX = 146 => sys_sched_get_priority_max(args[..1]);
SYS_SCHED_GET_PRIORITY_MIN = 147 => sys_sched_get_priority_min(args[..1]);
SYS_PIVOT_ROOT = 155 => sys_pivot_root(args[..2]);
SYS_PRCTL = 157 => sys_prctl(args[..5]);
SYS_ARCH_PRCTL = 158 => sys_arch_prctl(args[..2], &mut user_ctx);
SYS_SETRLIMIT = 160 => sys_setrlimit(args[..2]);

View File

@ -97,6 +97,7 @@ mod pidfd_getfd;
mod pidfd_open;
mod pidfd_send_signal;
mod pipe;
mod pivot_root;
mod poll;
mod ppoll;
mod prctl;

View File

@ -0,0 +1,41 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
fs::path::FsPath, prelude::*, process::posix_thread::thread_table::with_global_threads,
syscall::constants::MAX_FILENAME_LEN,
};
pub fn sys_pivot_root(
new_root_ptr: Vaddr,
put_old_ptr: Vaddr,
ctx: &Context,
) -> Result<SyscallReturn> {
let new_root_name = ctx
.user_space()
.read_cstring(new_root_ptr, MAX_FILENAME_LEN)?;
let put_old_name = ctx
.user_space()
.read_cstring(put_old_ptr, MAX_FILENAME_LEN)?;
debug!(
"pivot_root: new_root = {:?}, put_old = {:?}",
new_root_name, put_old_name
);
let new_root_name = new_root_name.to_string_lossy();
let new_root_path = FsPath::try_from(new_root_name.as_ref())?;
let put_old_name = put_old_name.to_string_lossy();
let put_old_path = FsPath::try_from(put_old_name.as_ref())?;
// TODO: Locking the global thread table here is a workaround. We need to use a more
// suitable lock (i.e. the global mount lock or the namespace lock) to avoid deadlock.
with_global_threads(move |table| {
let fs_ref = ctx.thread_local.borrow_fs();
let mut fs_resolver = fs_ref.resolver().write();
fs_resolver.pivot_root(new_root_path, put_old_path, table, ctx)
})?;
Ok(SyscallReturn::Return(0))
}

View File

@ -15,7 +15,6 @@ TEST_BUILD_DIR ?= $(INITRAMFS)/test
TEST_APPS := \
alarm \
capability \
chroot \
clone3 \
cpu_affinity \
devfs \
@ -27,6 +26,7 @@ TEST_APPS := \
fdatasync \
file_io \
fork_c \
fs_isolation \
getcpu \
getpid \
hello_pie \

View File

@ -0,0 +1,137 @@
// SPDX-License-Identifier: MPL-2.0
#define _GNU_SOURCE
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/mount.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/wait.h>
#include <fcntl.h>
#include <errno.h>
#include <string.h>
#include "../test.h"
// --- Test Configuration ---
#define CHROOT_DIR "/new_root"
#define PIVOT_TARGET_DIR "/second_root"
#define PUT_OLD_DIR_NAME "old_root"
// Marker directories and files to verify the pivot operation.
// These will be backed by tmpfs to ensure they are on distinct filesystems.
#define OLD_ROOT_MARKER_MNT "/old_root_marker_mnt"
#define OLD_ROOT_MARKER_FILE OLD_ROOT_MARKER_MNT "/old.txt"
#define NEW_ROOT_MARKER_MNT "/new_root_marker_mnt"
#define NEW_ROOT_MARKER_FILE NEW_ROOT_MARKER_MNT "/new.txt"
// Helper to create a directory if it doesn't exist.
static void ensure_dir(const char *path)
{
CHECK_WITH(mkdir(path, 0755), errno == 0 || errno == EEXIST);
}
// Helper to create a marker file on a tmpfs mount.
static void create_marker(const char *mount_point, const char *file_path)
{
ensure_dir(mount_point);
CHECK(mount("tmpfs", mount_point, "tmpfs", 0, ""));
int fd = CHECK(open(file_path, O_CREAT | O_WRONLY, 0644));
CHECK(close(fd));
}
FN_TEST(pivot_root_test)
{
// --- Phase 1: Setup a chroot environment with a nested bind mount ---
TEST_SUCC(unshare(CLONE_NEWNS));
ensure_dir(CHROOT_DIR);
TEST_SUCC(mount("/", CHROOT_DIR, NULL, MS_BIND | MS_REC, NULL));
char pivot_target_full_path[256];
snprintf(pivot_target_full_path, sizeof(pivot_target_full_path), "%s%s",
CHROOT_DIR, PIVOT_TARGET_DIR);
ensure_dir(pivot_target_full_path);
// Negative test: pivot_root should fail if the root mount is the root mount of rootfs.
TEST_ERRNO(syscall(SYS_pivot_root, pivot_target_full_path,
pivot_target_full_path),
EINVAL);
TEST_SUCC(chroot(CHROOT_DIR));
TEST_SUCC(chdir("/")); // Change to the `new_root` after chroot.
// --- Phase 2: Prepare for and execute pivot_root ---
TEST_SUCC(mount("/", PIVOT_TARGET_DIR, NULL, MS_BIND | MS_REC, NULL));
// Create the directory where the old root will be placed.
char put_old_full_path[256];
snprintf(put_old_full_path, sizeof(put_old_full_path), "%s/%s",
PIVOT_TARGET_DIR, PUT_OLD_DIR_NAME);
TEST_SUCC(mkdir(put_old_full_path, 0755));
// Mount a tmpfs on `put_old_full_path`. This makes it a different mount
// from `new_root`.
TEST_SUCC(mount("tmpfs", put_old_full_path, "tmpfs", 0, NULL));
// Create marker files on separate tmpfs mounts to verify the pivot.
create_marker(OLD_ROOT_MARKER_MNT, OLD_ROOT_MARKER_FILE);
char new_root_marker_mnt_full_path[256];
snprintf(new_root_marker_mnt_full_path,
sizeof(new_root_marker_mnt_full_path), "%s%s",
PIVOT_TARGET_DIR, NEW_ROOT_MARKER_MNT);
char new_root_marker_file_full_path[256];
snprintf(new_root_marker_file_full_path,
sizeof(new_root_marker_file_full_path), "%s%s",
PIVOT_TARGET_DIR, NEW_ROOT_MARKER_FILE);
create_marker(new_root_marker_mnt_full_path,
new_root_marker_file_full_path);
// --- Phase 3: Negative tests for pivot_root ---
// pivot_root fails with ENOTDIR if the `new_root` or `put_old` is not a directory.
TEST_ERRNO(syscall(SYS_pivot_root, new_root_marker_file_full_path,
put_old_full_path),
ENOTDIR);
// pivot_root fails with EINVAL if the `put_old` is not underneath the `new_root`.
TEST_ERRNO(syscall(SYS_pivot_root, "./proc", put_old_full_path),
EINVAL);
// pivot_root fails with EINVAL if the `new_root` is not a mount root.
TEST_ERRNO(syscall(SYS_pivot_root, "./sys/fs", "./sys/fs"), EINVAL);
// pivot_root fails with EBUSY if the `new_root` is the current root.
TEST_ERRNO(syscall(SYS_pivot_root, ".", "./bin"), EBUSY);
// --- Phase 4: Do pivot_root and verification ---
// Perform the pivot_root operation.
TEST_SUCC(syscall(SYS_pivot_root, PIVOT_TARGET_DIR, put_old_full_path));
// After pivot, the cwd is changed to the `new_root`.
char cwd[1024];
TEST_RES(syscall(SYS_getcwd, cwd, sizeof(cwd)), strcmp(cwd, "/") == 0);
// Verify that the `new_root` is active by checking for its marker file.
TEST_SUCC(access(NEW_ROOT_MARKER_FILE, F_OK));
// Verify that the old root has been moved.
char old_marker_in_new_path[256];
snprintf(old_marker_in_new_path, sizeof(old_marker_in_new_path),
"/%s%s", PUT_OLD_DIR_NAME, OLD_ROOT_MARKER_FILE);
TEST_SUCC(access(old_marker_in_new_path, F_OK));
// Verify that the old root marker is no longer at the root.
TEST_ERRNO(access(OLD_ROOT_MARKER_FILE, F_OK), ENOENT);
// --- Phase 5: Cleanup ---
char old_root_path[256];
snprintf(old_root_path, sizeof(old_root_path), "/%s", PUT_OLD_DIR_NAME);
TEST_SUCC(umount2(old_root_path, MNT_DETACH));
TEST_SUCC(umount(old_root_path));
TEST_SUCC(rmdir(old_root_path));
}
END_TEST()

View File

@ -10,7 +10,6 @@ cd ${SCRIPT_DIR}/..
echo "Start process test......"
# These test programs are sorted by name.
tests="
chroot/chroot_jail
clone3/clone_exit_signal
clone3/clone_files
clone3/clone_no_exit_signal
@ -26,6 +25,8 @@ exit/exit_procfs
eventfd2/eventfd2
fork/fork
fork_c/fork
fs_isolation/chroot
fs_isolation/pivot_root
getcpu/getcpu
getpid/getpid
hello_pie/hello