Align `PidFile` semantics with POSIX standards

This commit is contained in:
li041 2026-01-26 06:16:34 +00:00 committed by Ruihan Li
parent 425cc2d1cf
commit 306cc714ae
7 changed files with 131 additions and 34 deletions

View File

@ -8,7 +8,7 @@ use crate::{
fs::{ fs::{
inode_handle::{FileIo, InodeHandle}, inode_handle::{FileIo, InodeHandle},
pipe::Pipe, pipe::Pipe,
pseudofs::{PipeFs, PseudoInode}, pseudofs::{PipeFs, PseudoInode, PseudoInodeType},
utils::{ utils::{
AccessMode, Extension, FileSystem, Inode, InodeIo, InodeMode, InodeType, Metadata, AccessMode, Extension, FileSystem, Inode, InodeIo, InodeMode, InodeType, Metadata,
StatusFlags, mkmod, StatusFlags, mkmod,
@ -46,7 +46,7 @@ impl AnonPipeInode {
let pipe = Pipe::new(); let pipe = Pipe::new();
let pseudo_inode = PipeFs::singleton().alloc_inode( let pseudo_inode = PipeFs::singleton().alloc_inode(
InodeType::NamedPipe, PseudoInodeType::Pipe,
mkmod!(u+rw), mkmod!(u+rw),
Uid::new_root(), Uid::new_root(),
Gid::new_root(), Gid::new_root(),

View File

@ -71,7 +71,7 @@ impl PseudoFs {
sb: SuperBlock::new(magic, aster_block::BLOCK_SIZE, NAME_MAX), sb: SuperBlock::new(magic, aster_block::BLOCK_SIZE, NAME_MAX),
root: Arc::new(PseudoInode::new( root: Arc::new(PseudoInode::new(
ROOT_INO, ROOT_INO,
InodeType::Dir, PseudoInodeType::Root,
mkmod!(u+rw), mkmod!(u+rw),
Uid::new_root(), Uid::new_root(),
Gid::new_root(), Gid::new_root(),
@ -85,7 +85,7 @@ impl PseudoFs {
pub fn alloc_inode( pub fn alloc_inode(
self: &Arc<Self>, self: &Arc<Self>,
type_: InodeType, type_: PseudoInodeType,
mode: InodeMode, mode: InodeMode,
uid: Uid, uid: Uid,
gid: Gid, gid: Gid,
@ -140,7 +140,7 @@ impl SockFs {
/// Creates a pseudo `Path` for a socket. /// Creates a pseudo `Path` for a socket.
pub fn new_path() -> Path { pub fn new_path() -> Path {
let socket_inode = Arc::new(Self::singleton().alloc_inode( let socket_inode = Arc::new(Self::singleton().alloc_inode(
InodeType::Socket, PseudoInodeType::Socket,
mkmod!(a+rwx), mkmod!(a+rwx),
Uid::new_root(), Uid::new_root(),
Gid::new_root(), Gid::new_root(),
@ -206,7 +206,7 @@ impl AnonInodeFs {
SHARED_INODE.call_once(|| { SHARED_INODE.call_once(|| {
let shared_inode = Self::singleton().alloc_inode( let shared_inode = Self::singleton().alloc_inode(
InodeType::Unknown, PseudoInodeType::AnonInode,
mkmod!(u+rw), mkmod!(u+rw),
Uid::new_root(), Uid::new_root(),
Gid::new_root(), Gid::new_root(),
@ -217,6 +217,51 @@ impl AnonInodeFs {
} }
} }
pub struct PidfdFs {
_private: (),
}
impl PidfdFs {
/// Returns the singleton instance of the pidfd file system.
pub fn singleton() -> &'static Arc<PseudoFs> {
static PIDFDFS: Once<Arc<PseudoFs>> = Once::new();
PseudoFs::singleton(&PIDFDFS, "pidfdfs", PIDFDFS_MAGIC)
}
/// Creates a pseudo `Path` for a pidfd.
pub fn new_path(name_fn: fn(&dyn Inode) -> String) -> Path {
Path::new_pseudo(
Self::mount_node().clone(),
Self::shared_inode().clone(),
name_fn,
)
}
/// Returns the pseudo mount node of the pidfd file system.
pub fn mount_node() -> &'static Arc<Mount> {
static PIDFDFS_MOUNT: Once<Arc<Mount>> = Once::new();
PIDFDFS_MOUNT.call_once(|| Mount::new_pseudo(Self::singleton().clone()))
}
/// Returns the shared inode of the pidfd file system.
pub fn shared_inode() -> &'static Arc<dyn Inode> {
static SHARED_INODE: Once<Arc<dyn Inode>> = Once::new();
SHARED_INODE.call_once(|| {
let pidfd_inode = Self::singleton().alloc_inode(
PseudoInodeType::Pidfd,
mkmod!(u+rwx),
Uid::new_root(),
Gid::new_root(),
);
Arc::new(pidfd_inode)
})
}
}
pub(super) fn init() { pub(super) fn init() {
super::registry::register(&PipeFsType).unwrap(); super::registry::register(&PipeFsType).unwrap();
super::registry::register(&SockFsType).unwrap(); super::registry::register(&SockFsType).unwrap();
@ -282,24 +327,50 @@ const PIPEFS_MAGIC: u64 = 0x50495045;
const SOCKFS_MAGIC: u64 = 0x534F434B; const SOCKFS_MAGIC: u64 = 0x534F434B;
// Reference: <https://elixir.bootlin.com/linux/v6.16.5/source/include/uapi/linux/magic.h#L93> // Reference: <https://elixir.bootlin.com/linux/v6.16.5/source/include/uapi/linux/magic.h#L93>
const ANON_INODEFS_MAGIC: u64 = 0x09041934; const ANON_INODEFS_MAGIC: u64 = 0x09041934;
// Reference: <https://elixir.bootlin.com/linux/v6.16.5/source/include/uapi/linux/magic.h#L105>
const PIDFDFS_MAGIC: u64 = 0x50494446;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum PseudoInodeType {
Root,
Pipe,
Socket,
AnonInode,
Pidfd,
}
impl From<PseudoInodeType> for InodeType {
fn from(pseudo_type: PseudoInodeType) -> Self {
match pseudo_type {
PseudoInodeType::Root => InodeType::Dir,
PseudoInodeType::Pipe => InodeType::NamedPipe,
PseudoInodeType::Socket => InodeType::Socket,
PseudoInodeType::AnonInode => InodeType::Unknown,
PseudoInodeType::Pidfd => InodeType::Unknown,
}
}
}
/// A pseudo inode that does not correspond to any real path in the file system. /// A pseudo inode that does not correspond to any real path in the file system.
pub struct PseudoInode { pub struct PseudoInode {
metadata: SpinLock<Metadata>, metadata: SpinLock<Metadata>,
extension: Extension, extension: Extension,
fs: Weak<PseudoFs>, fs: Weak<PseudoFs>,
is_anon: bool,
} }
impl PseudoInode { impl PseudoInode {
fn new( fn new(
ino: u64, ino: u64,
type_: InodeType, type_: PseudoInodeType,
mode: InodeMode, mode: InodeMode,
uid: Uid, uid: Uid,
gid: Gid, gid: Gid,
fs: Weak<PseudoFs>, fs: Weak<PseudoFs>,
) -> Self { ) -> Self {
let now = now(); let now = now();
let type_ = InodeType::from(type_);
let metadata = Metadata { let metadata = Metadata {
dev: 0, dev: 0,
ino, ino,
@ -316,10 +387,12 @@ impl PseudoInode {
gid, gid,
rdev: 0, rdev: 0,
}; };
PseudoInode { PseudoInode {
metadata: SpinLock::new(metadata), metadata: SpinLock::new(metadata),
extension: Extension::new(), extension: Extension::new(),
fs, fs,
is_anon: type_ == InodeType::Unknown,
} }
} }
} }
@ -380,6 +453,13 @@ impl Inode for PseudoInode {
} }
fn set_mode(&self, mode: InodeMode) -> Result<()> { fn set_mode(&self, mode: InodeMode) -> Result<()> {
if self.is_anon {
return_errno_with_message!(
Errno::EOPNOTSUPP,
"the mode of anonymous inodes cannot be changed"
);
}
let mut meta = self.metadata.lock(); let mut meta = self.metadata.lock();
meta.mode = mode; meta.mode = mode;
meta.ctime = now(); meta.ctime = now();

View File

@ -11,7 +11,7 @@ use crate::{
file_handle::FileLike, file_handle::FileLike,
file_table::FdFlags, file_table::FdFlags,
path::Path, path::Path,
pseudofs::AnonInodeFs, pseudofs::PidfdFs,
utils::{CreationFlags, StatusFlags}, utils::{CreationFlags, StatusFlags},
}, },
prelude::*, prelude::*,
@ -43,7 +43,7 @@ impl Debug for PidFile {
impl PidFile { impl PidFile {
pub fn new(process: Arc<Process>, is_nonblocking: bool) -> Self { pub fn new(process: Arc<Process>, is_nonblocking: bool) -> Self {
let pseudo_path = AnonInodeFs::new_path(|_| "anon_inode:[pidfd]".to_string()); let pseudo_path = PidfdFs::new_path(|_| "anon_inode:[pidfd]".to_string());
Self { Self {
process: Arc::downgrade(&process), process: Arc::downgrade(&process),
@ -59,7 +59,7 @@ impl PidFile {
// Reference: <https://man7.org/linux/man-pages/man2/pidfd_open.2.html>. // Reference: <https://man7.org/linux/man-pages/man2/pidfd_open.2.html>.
let Some(process) = self.process.upgrade() else { let Some(process) = self.process.upgrade() else {
// The process has been reaped. // The process has been reaped.
return IoEvents::IN; return IoEvents::IN | IoEvents::HUP;
}; };
if process.status().is_zombie() { if process.status().is_zombie() {
IoEvents::IN IoEvents::IN
@ -86,6 +86,20 @@ impl FileLike for PidFile {
return_errno_with_message!(Errno::EINVAL, "PID file cannot be written"); return_errno_with_message!(Errno::EINVAL, "PID file cannot be written");
} }
fn read_at(&self, _offset: usize, _writer: &mut VmWriter) -> Result<usize> {
return_errno_with_message!(
Errno::EINVAL,
"PID file cannot be read at a specific offset"
);
}
fn write_at(&self, _offset: usize, _reader: &mut VmReader) -> Result<usize> {
return_errno_with_message!(
Errno::EINVAL,
"PID file cannot be written at a specific offset"
);
}
fn set_status_flags(&self, new_flags: StatusFlags) -> Result<()> { fn set_status_flags(&self, new_flags: StatusFlags) -> Result<()> {
if new_flags.contains(StatusFlags::O_NONBLOCK) { if new_flags.contains(StatusFlags::O_NONBLOCK) {
self.is_nonblocking.store(true, Ordering::Relaxed); self.is_nonblocking.store(true, Ordering::Relaxed);
@ -118,8 +132,8 @@ impl FileLike for PidFile {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
writeln!(f, "pos:\t{}", 0)?; writeln!(f, "pos:\t{}", 0)?;
writeln!(f, "flags:\t0{:o}", self.flags)?; writeln!(f, "flags:\t0{:o}", self.flags)?;
writeln!(f, "mnt_id:\t{}", AnonInodeFs::mount_node().id())?; writeln!(f, "mnt_id:\t{}", PidfdFs::mount_node().id())?;
writeln!(f, "ino:\t{}", AnonInodeFs::shared_inode().ino())?; writeln!(f, "ino:\t{}", PidfdFs::shared_inode().ino())?;
writeln!(f, "Pid:\t{}", self.pid)?; writeln!(f, "Pid:\t{}", self.pid)?;
// TODO: Currently we do not support PID namespaces. Just print the PID once. // TODO: Currently we do not support PID namespaces. Just print the PID once.
writeln!(f, "NSpid:\t{}", self.pid) writeln!(f, "NSpid:\t{}", self.pid)
@ -140,7 +154,7 @@ impl Pollable for PidFile {
fn poll(&self, mask: IoEvents, poller: Option<&mut PollHandle>) -> IoEvents { fn poll(&self, mask: IoEvents, poller: Option<&mut PollHandle>) -> IoEvents {
let Some(process) = self.process.upgrade() else { let Some(process) = self.process.upgrade() else {
// The process has been reaped. // The process has been reaped.
return mask & IoEvents::IN; return mask & (IoEvents::IN | IoEvents::HUP);
}; };
process process
.pidfile_pollee .pidfile_pollee

View File

@ -20,6 +20,7 @@ use super::{
task_set::TaskSet, task_set::TaskSet,
}; };
use crate::{ use crate::{
events::IoEvents,
fs::cgroupfs::CgroupNode, fs::cgroupfs::CgroupNode,
prelude::*, prelude::*,
process::{ process::{
@ -151,6 +152,12 @@ pub struct Process {
user_ns: Mutex<Arc<UserNamespace>>, user_ns: Mutex<Arc<UserNamespace>>,
} }
impl Drop for Process {
fn drop(&mut self) {
self.pidfile_pollee.notify(IoEvents::HUP);
}
}
/// Representing a parent process by holding a weak reference to it and its PID. /// Representing a parent process by holding a weak reference to it and its PID.
/// ///
/// This type caches the value of the PID so that it can be retrieved cheaply. /// This type caches the value of the PID so that it can be retrieved cheaply.

View File

@ -36,9 +36,9 @@ FN_TEST(read_write)
{ {
char buf[1] = {}; char buf[1] = {};
TEST_ERRNO(read(pid_fd, buf, 1), EINVAL); TEST_ERRNO(read(pid_fd, buf, 1), EINVAL);
TEST_ERRNO(pread(pid_fd, buf, 1, 0), ESPIPE); TEST_ERRNO(pread(pid_fd, buf, 1, 0), EINVAL);
TEST_ERRNO(write(pid_fd, "a", 1), EINVAL); TEST_ERRNO(write(pid_fd, "a", 1), EINVAL);
TEST_ERRNO(pwrite(pid_fd, "b", 1, 0), ESPIPE); TEST_ERRNO(pwrite(pid_fd, "b", 1, 0), EINVAL);
} }
END_TEST() END_TEST()
@ -55,7 +55,7 @@ FN_TEST(file_stat)
{ {
struct stat file_info; struct stat file_info;
TEST_RES(fstat(pid_fd, &file_info), TEST_RES(fstat(pid_fd, &file_info),
file_info.st_mode == 0600 && file_info.st_size == 0 && file_info.st_mode == 0700 && file_info.st_size == 0 &&
file_info.st_blksize == 4096); file_info.st_blksize == 4096);
} }
END_TEST() END_TEST()
@ -78,9 +78,10 @@ END_TEST()
FN_TEST(wait) FN_TEST(wait)
{ {
#define P_PIDFD 3 #define P_PIDFD 3
TEST_SUCC(waitid(P_PIDFD, pid_fd, NULL, WNOHANG | WEXITED)); TEST_SUCC(waitid(P_PIDFD, pid_fd, NULL, WEXITED | WNOWAIT));
pfd.revents = 0;
TEST_RES(poll(&pfd, 1, 0), pfd.revents == POLLIN); TEST_RES(poll(&pfd, 1, 0), pfd.revents == POLLIN);
TEST_SUCC(waitid(P_PIDFD, pid_fd, NULL, WEXITED));
TEST_RES(poll(&pfd, 1, 0), pfd.revents == (POLLIN | POLLHUP));
TEST_ERRNO(waitid(P_PIDFD, pid_fd, NULL, WNOHANG | WEXITED), ECHILD); TEST_ERRNO(waitid(P_PIDFD, pid_fd, NULL, WNOHANG | WEXITED), ECHILD);
TEST_ERRNO(waitid(P_PIDFD, 100, NULL, WNOHANG | WEXITED), EBADF); TEST_ERRNO(waitid(P_PIDFD, 100, NULL, WNOHANG | WEXITED), EBADF);

View File

@ -54,19 +54,17 @@ FN_TEST(anon_inodefs_share_inode)
{ {
struct fd_mode { struct fd_mode {
int fd; int fd;
mode_t modes[2]; mode_t mode;
}; };
struct fd_mode fds[] = { struct fd_mode fds[] = {
{ epoll_fd, { 0600, 0000 } }, { event_fd, { 0000, 0111 } }, { epoll_fd, 0600 }, { event_fd, 0600 }, { timer_fd, 0600 },
{ timer_fd, { 0111, 0222 } }, { signal_fd, { 0222, 0333 } }, { signal_fd, 0600 }, { inotify_fd, 0600 }, { pid_fd, 0700 },
{ inotify_fd, { 0333, 0444 } }, { pid_fd, { 0444, 0600 } },
}; };
for (size_t i = 0; i < sizeof(fds) / sizeof(fds[0]); i++) { for (size_t i = 0; i < sizeof(fds) / sizeof(fds[0]); i++) {
TEST_RES(get_mode(fds[i].fd), _ret == fds[i].modes[0]); TEST_RES(get_mode(fds[i].fd), _ret == fds[i].mode);
TEST_SUCC(set_mode(fds[i].fd, fds[i].modes[1])); TEST_ERRNO(set_mode(fds[i].fd, 0600), EOPNOTSUPP);
TEST_RES(get_mode(fds[i].fd), _ret == fds[i].modes[1]);
} }
} }
END_TEST() END_TEST()

View File

@ -24,8 +24,7 @@ static int read_fdinfo_mnt_id(int fd)
FN_TEST(pseudo_mount) FN_TEST(pseudo_mount)
{ {
int anon[] = { epoll_fd, event_fd, timer_fd, int anon[] = { epoll_fd, event_fd, timer_fd, signal_fd, inotify_fd };
signal_fd, inotify_fd, pid_fd };
struct fd_group { struct fd_group {
int *fds; int *fds;
@ -34,13 +33,11 @@ FN_TEST(pseudo_mount)
}; };
struct fd_group groups[] = { struct fd_group groups[] = {
{ pipe_1, 2, -1 }, { pipe_1, 2, -1 }, { sock, 2, -1 }, { anon, 5, -1 },
{ sock, 2, -1 }, { &mem_fd, 1, -1 }, { &pid_fd, 1, -1 },
{ anon, 6, -1 },
{ &mem_fd, 1, -1 },
}; };
for (int i = 0; i < 4; i++) { for (int i = 0; i < 5; i++) {
int base = TEST_SUCC(read_fdinfo_mnt_id(groups[i].fds[0])); int base = TEST_SUCC(read_fdinfo_mnt_id(groups[i].fds[0]));
for (int j = 1; j < groups[i].nr; j++) { for (int j = 1; j < groups[i].nr; j++) {
TEST_RES(read_fdinfo_mnt_id(groups[i].fds[j]), TEST_RES(read_fdinfo_mnt_id(groups[i].fds[j]),
@ -49,8 +46,8 @@ FN_TEST(pseudo_mount)
groups[i].mnt_id = base; groups[i].mnt_id = base;
} }
for (int i = 0; i < 4; i++) { for (int i = 0; i < 5; i++) {
for (int j = i + 1; j < 4; j++) { for (int j = i + 1; j < 5; j++) {
TEST_RES(0, groups[i].mnt_id != groups[j].mnt_id); TEST_RES(0, groups[i].mnt_id != groups[j].mnt_id);
} }
} }