diff --git a/book/src/kernel/linux-compatibility/README.md b/book/src/kernel/linux-compatibility/README.md index 10511a602..f4d44dda7 100644 --- a/book/src/kernel/linux-compatibility/README.md +++ b/book/src/kernel/linux-compatibility/README.md @@ -346,6 +346,7 @@ which are summarized in the table below. | 434 | pidfd_open | ✅ | 💯 | | 435 | clone3 | ✅ | [⚠️](syscall-flag-coverage/process-and-thread-management/#clone-and-clone3) | | 436 | close_range | ✅ | 💯 | +| 438 | pidfd_getfd | ✅ | 💯 | | 439 | faccessat2 | ✅ | [⚠️](syscall-flag-coverage/file-and-directory-operations/#faccessat2) | | 441 | epoll_pwait2 | ✅ | 💯 | | 452 | fchmodat2 | ✅ | 💯 | diff --git a/book/src/kernel/linux-compatibility/syscall-flag-coverage/file-descriptor-and-io-control/fully_covered.scml b/book/src/kernel/linux-compatibility/syscall-flag-coverage/file-descriptor-and-io-control/fully_covered.scml index adb5de003..0b0d712a5 100644 --- a/book/src/kernel/linux-compatibility/syscall-flag-coverage/file-descriptor-and-io-control/fully_covered.scml +++ b/book/src/kernel/linux-compatibility/syscall-flag-coverage/file-descriptor-and-io-control/fully_covered.scml @@ -32,5 +32,8 @@ select(nfds, readfds, writefds, exceptfds, timeout); // Obtain a file descriptor that refers to a process pidfd_open(pid, flags = PIDFD_NONBLOCK); +// Obtain a duplicate of another process's file descriptor +pidfd_getfd(pid, targetfd, flags = 0); + // Close all file descriptors in the inclusive range [first, last] close_range(first, last, flags = CLOSE_RANGE_UNSHARE | CLOSE_RANGE_CLOEXEC); \ No newline at end of file diff --git a/kernel/src/syscall/arch/generic.rs b/kernel/src/syscall/arch/generic.rs index 6dcd5c737..e01389fdd 100644 --- a/kernel/src/syscall/arch/generic.rs +++ b/kernel/src/syscall/arch/generic.rs @@ -80,6 +80,7 @@ macro_rules! import_generic_syscall_entries { munmap::sys_munmap, nanosleep::{sys_clock_nanosleep, sys_nanosleep}, open::sys_openat, + pidfd_getfd::sys_pidfd_getfd, pidfd_open::sys_pidfd_open, pipe::sys_pipe2, ppoll::sys_ppoll, @@ -392,6 +393,7 @@ macro_rules! define_syscalls_with_generic_syscall_table { SYS_PIDFD_OPEN = 434 => sys_pidfd_open(args[..2]); SYS_CLONE3 = 435 => sys_clone3(args[..2], &user_ctx); SYS_CLOSE_RANGE = 436 => sys_close_range(args[..3]); + SYS_PIDFD_GETFD = 438 => sys_pidfd_getfd(args[..3]); SYS_FACCESSAT2 = 439 => sys_faccessat2(args[..4]); SYS_EPOLL_PWAIT2 = 441 => sys_epoll_pwait2(args[..5]); SYS_FCHMODAT2 = 452 => sys_fchmodat2(args[..4]); diff --git a/kernel/src/syscall/arch/x86.rs b/kernel/src/syscall/arch/x86.rs index d19719391..4157bca91 100644 --- a/kernel/src/syscall/arch/x86.rs +++ b/kernel/src/syscall/arch/x86.rs @@ -81,6 +81,7 @@ use super::{ nanosleep::{sys_clock_nanosleep, sys_nanosleep}, open::{sys_creat, sys_open, sys_openat}, pause::sys_pause, + pidfd_getfd::sys_pidfd_getfd, pidfd_open::sys_pidfd_open, pipe::{sys_pipe, sys_pipe2}, poll::sys_poll, @@ -407,6 +408,7 @@ impl_syscall_nums_and_dispatch_fn! { SYS_PIDFD_OPEN = 434 => sys_pidfd_open(args[..2]); SYS_CLONE3 = 435 => sys_clone3(args[..2], &user_ctx); SYS_CLOSE_RANGE = 436 => sys_close_range(args[..3]); + SYS_PIDFD_GETFD = 438 => sys_pidfd_getfd(args[..3]); SYS_FACCESSAT2 = 439 => sys_faccessat2(args[..4]); SYS_EPOLL_PWAIT2 = 441 => sys_epoll_pwait2(args[..5]); SYS_FCHMODAT2 = 452 => sys_fchmodat2(args[..4]); diff --git a/kernel/src/syscall/mod.rs b/kernel/src/syscall/mod.rs index 601d335ad..b6b57b2d3 100644 --- a/kernel/src/syscall/mod.rs +++ b/kernel/src/syscall/mod.rs @@ -93,6 +93,7 @@ mod munmap; mod nanosleep; mod open; mod pause; +mod pidfd_getfd; mod pidfd_open; mod pipe; mod poll; diff --git a/kernel/src/syscall/pidfd_getfd.rs b/kernel/src/syscall/pidfd_getfd.rs new file mode 100644 index 000000000..b0cfdf952 --- /dev/null +++ b/kernel/src/syscall/pidfd_getfd.rs @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: MPL-2.0 + +use crate::{ + fs::file_table::{FdFlags, FileDesc, get_file_fast}, + prelude::*, + process::{PidFile, credentials::capabilities::CapSet, posix_thread::AsPosixThread}, + syscall::SyscallReturn, +}; + +pub fn sys_pidfd_getfd( + pidfd: FileDesc, + targetfd: FileDesc, + flags: u32, + ctx: &Context, +) -> Result { + // The `flags` argument is reserved for future use. Currently, it must be specified as 0. + if flags != 0 { + return_errno_with_message!(Errno::EINVAL, "invalid flags"); + } + debug!( + "pidfd_getfd: pidfd={}, targetfd={}, flags={}", + pidfd, targetfd, flags + ); + + let mut file_table = ctx.thread_local.borrow_file_table_mut(); + let file = get_file_fast!(&mut file_table, pidfd); + let Some(pid_file) = file.downcast_ref::() else { + return_errno_with_message!(Errno::EINVAL, "the file is not a PID file"); + }; + + let process = pid_file + .process_opt() + .ok_or_else(|| Error::with_message(Errno::ESRCH, "the target process has been reaped"))?; + + // The calling process should have PTRACE_MODE_ATTACH_REALCREDS permissions (see ptrace(2)) + // over the process referred to by `pidfd`. + // Currently, this is implemented as requiring the calling process to have the + // CAP_SYS_PTRACE capability, which is stricter. + // TODO: Implement appropriate PTRACE_MODE_ATTACH_REALCREDS permission check. + if process + .user_ns() + .lock() + .check_cap(CapSet::SYS_PTRACE, ctx.posix_thread) + .is_err() + { + return_errno_with_message!( + Errno::EPERM, + "the calling process does not have the required permissions" + ); + } + + let main_thread = process.main_thread(); + + // Get the file description corresponding to to the file descriptor `targetfd` in the process + // referred to by the PID file. + let target_file_table = main_thread.as_posix_thread().unwrap().file_table(); + let target_file = target_file_table + .lock() + .as_ref() + .ok_or_else(|| Error::with_message(Errno::ESRCH, "the target process has exited"))? + .read() + .get_file(targetfd)? + .clone(); + + // Duplicate the file descriptor into the caller's file descriptor table. + let new_fd = { + let mut file_table_locked = file_table.unwrap().write(); + file_table_locked.insert(target_file, FdFlags::CLOEXEC) + }; + + Ok(SyscallReturn::Return(new_fd as _)) +} diff --git a/test/initramfs/src/apps/process/pidfd_getfd.c b/test/initramfs/src/apps/process/pidfd_getfd.c new file mode 100644 index 000000000..ade6e9294 --- /dev/null +++ b/test/initramfs/src/apps/process/pidfd_getfd.c @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: MPL-2.0 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../test.h" + +static int pidfd; +static pid_t pid; +static int fd; +static int target_fd; +static const char *TESTFILE = "/tmp/pidfd_getfd_testfile"; +static int invalid_pidfd = -1; + +static int pidfd_open(pid_t pid, unsigned int flags) +{ + return syscall(SYS_pidfd_open, pid, flags); +} + +static int pidfd_getfd(int pidfd, int targetfd, unsigned int flags) +{ + return syscall(SYS_pidfd_getfd, pidfd, targetfd, flags); +} + +FN_TEST(pidfd_getfd_valid) +{ + size_t size = sizeof(int); + volatile int *shared_fd = + TEST_SUCC(mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0)); + + pid = TEST_SUCC(fork()); + if (pid == 0) { + fd = CHECK(open(TESTFILE, O_CREAT | O_RDWR | O_TRUNC, 0644)); + + CHECK_WITH(write(fd, "Test content\n", 13), _ret == 13); + *shared_fd = fd; + + pause(); + exit(0); + } + + while (*shared_fd == 0) { + usleep(100); + } + fd = *shared_fd; + + pidfd = TEST_SUCC(pidfd_open(pid, 0)); + target_fd = TEST_SUCC(pidfd_getfd(pidfd, fd, 0)); + + char buffer[128] = { 0 }; + TEST_RES(pread(target_fd, buffer, sizeof(buffer), 0), + strcmp(buffer, "Test content\n") == 0); + + TEST_SUCC(munmap((void *)shared_fd, size)); + TEST_SUCC(close(target_fd)); + TEST_SUCC(close(pidfd)); + TEST_SUCC(unlink(TESTFILE)); + + TEST_SUCC(kill(pid, SIGKILL)); + TEST_SUCC(waitpid(pid, NULL, 0)); +} +END_TEST() + +FN_TEST(pidfd_getfd_after_child_exits) +{ + pid = TEST_SUCC(fork()); + if (pid == 0) { + exit(0); + } + TEST_SUCC(waitid(P_PID, pid, NULL, + WNOWAIT | WEXITED)); // Ensure the child has exited + pidfd = TEST_SUCC(pidfd_open(pid, 0)); + + TEST_ERRNO(pidfd_getfd(pidfd, fd, 0), ESRCH); + TEST_SUCC(waitpid(pid, NULL, 0)); + TEST_ERRNO(pidfd_getfd(pidfd, fd, 0), ESRCH); + + TEST_SUCC(close(pidfd)); +} +END_TEST() + +FN_TEST(pidfd_getfd_errnos) +{ + pid = TEST_SUCC(fork()); + if (pid == 0) { + exit(0); + } + TEST_SUCC(waitid(P_PID, pid, NULL, + WNOWAIT | WEXITED)); // Ensure the child has exited + pidfd = TEST_SUCC(pidfd_open(pid, 0)); + + TEST_ERRNO(pidfd_getfd(invalid_pidfd, fd, 0), EBADF); + TEST_ERRNO(pidfd_getfd(pidfd, -1, 0), ESRCH); + TEST_ERRNO(pidfd_getfd(pidfd, fd, 1), EINVAL); + + TEST_SUCC(waitpid(pid, NULL, 0)); + TEST_SUCC(close(pidfd)); +} +END_TEST() \ No newline at end of file diff --git a/test/initramfs/src/apps/scripts/process.sh b/test/initramfs/src/apps/scripts/process.sh index adeb29332..f3bd28bda 100755 --- a/test/initramfs/src/apps/scripts/process.sh +++ b/test/initramfs/src/apps/scripts/process.sh @@ -48,6 +48,7 @@ namespace/unshare process/group_session process/job_control process/pidfd +process/pidfd_getfd process/wait4 procfs/dentry_cache procfs/pid_mem diff --git a/test/initramfs/src/syscall/ltp/testcases/all.txt b/test/initramfs/src/syscall/ltp/testcases/all.txt index f1d6e4c3b..40e91dced 100644 --- a/test/initramfs/src/syscall/ltp/testcases/all.txt +++ b/test/initramfs/src/syscall/ltp/testcases/all.txt @@ -1042,7 +1042,7 @@ pathconf01 # personality02 # pidfd_getfd01 -# pidfd_getfd02 +pidfd_getfd02 pidfd_open01 pidfd_open02