diff --git a/docs/src/kernel/linux-compatibility.md b/docs/src/kernel/linux-compatibility.md index ce81a3a53..5a1befe11 100644 --- a/docs/src/kernel/linux-compatibility.md +++ b/docs/src/kernel/linux-compatibility.md @@ -291,7 +291,7 @@ provided by Linux on x86-64 architecture. | 268 | fchmodat | ✅ | | 269 | faccessat | ✅ | | 270 | pselect6 | ✅ | -| 271 | ppoll | ❌ | +| 271 | ppoll | ✅ | | 272 | unshare | ❌ | | 273 | set_robust_list | ✅ | | 274 | get_robust_list | ❌ | diff --git a/kernel/src/process/signal/mod.rs b/kernel/src/process/signal/mod.rs index 8a267cc8a..c7b8dc98d 100644 --- a/kernel/src/process/signal/mod.rs +++ b/kernel/src/process/signal/mod.rs @@ -20,7 +20,7 @@ use c_types::{siginfo_t, ucontext_t}; use constants::SIGSEGV; pub use events::{SigEvents, SigEventsFilter}; use ostd::{cpu::context::UserContext, user::UserContextApi}; -pub use pause::{with_signal_blocked, Pause}; +pub use pause::{with_sigmask_changed, Pause}; pub use poll::{PollAdaptor, PollHandle, Pollable, Pollee, Poller}; use sig_action::{SigAction, SigActionFlags, SigDefaultAction}; use sig_mask::SigMask; diff --git a/kernel/src/process/signal/pause.rs b/kernel/src/process/signal/pause.rs index fa641dbe7..c51faede4 100644 --- a/kernel/src/process/signal/pause.rs +++ b/kernel/src/process/signal/pause.rs @@ -206,16 +206,22 @@ impl Pause for WaitQueue { } } -/// Executes a closure while temporarily blocking some signals for the current POSIX thread. -pub fn with_signal_blocked(ctx: &Context, mask: SigMask, operate: impl FnOnce() -> R) -> R { - let posix_thread = ctx.posix_thread; - let sig_mask = posix_thread.sig_mask(); +/// Executes a closure after temporarily adjusting the signal mask of the current POSIX thread. +pub fn with_sigmask_changed( + ctx: &Context, + mask_op: impl FnOnce(SigMask) -> SigMask, + operate: impl FnOnce() -> R, +) -> R { + let sig_mask = ctx.posix_thread.sig_mask(); + // Save the original signal mask and apply the mask updates. let old_mask = sig_mask.load(Ordering::Relaxed); - sig_mask.store(old_mask + mask, Ordering::Relaxed); + sig_mask.store(mask_op(old_mask), Ordering::Relaxed); + // Perform the operation. let res = operate(); + // Restore the original signal mask. sig_mask.store(old_mask, Ordering::Relaxed); res diff --git a/kernel/src/process/wait.rs b/kernel/src/process/wait.rs index f95f16545..6b492c496 100644 --- a/kernel/src/process/wait.rs +++ b/kernel/src/process/wait.rs @@ -2,13 +2,16 @@ #![expect(dead_code)] -use super::{process_filter::ProcessFilter, signal::constants::SIGCHLD, ExitCode, Pid, Process}; +use super::{ + process_filter::ProcessFilter, + signal::{constants::SIGCHLD, with_sigmask_changed}, + ExitCode, Pid, Process, +}; use crate::{ prelude::*, process::{ posix_thread::{thread_table, AsPosixThread}, process_table, - signal::with_signal_blocked, }, }; @@ -40,51 +43,55 @@ pub fn wait_child_exit( ctx: &Context, ) -> Result>> { let current = ctx.process; - let zombie_child = with_signal_blocked(ctx, SIGCHLD.into(), || { - current.children_wait_queue().pause_until(|| { - let unwaited_children = current - .children() - .lock() - .values() - .filter(|child| match child_filter { - ProcessFilter::Any => true, - ProcessFilter::WithPid(pid) => child.pid() == pid, - ProcessFilter::WithPgid(pgid) => child.pgid() == pgid, - }) - .cloned() - .collect::>(); + let zombie_child = with_sigmask_changed( + ctx, + |sigmask| sigmask + SIGCHLD, + || { + current.children_wait_queue().pause_until(|| { + let unwaited_children = current + .children() + .lock() + .values() + .filter(|child| match child_filter { + ProcessFilter::Any => true, + ProcessFilter::WithPid(pid) => child.pid() == pid, + ProcessFilter::WithPgid(pgid) => child.pgid() == pgid, + }) + .cloned() + .collect::>(); - if unwaited_children.is_empty() { - return Some(Err(Error::with_message( - Errno::ECHILD, - "the process has no child to wait", - ))); - } - - // return immediately if we find a zombie child - let zombie_child = unwaited_children - .iter() - .find(|child| child.status().is_zombie()); - - if let Some(zombie_child) = zombie_child { - let zombie_pid = zombie_child.pid(); - if wait_options.contains(WaitOptions::WNOWAIT) { - // does not reap child, directly return - return Some(Ok(Some(zombie_child.clone()))); - } else { - reap_zombie_child(current, zombie_pid); - return Some(Ok(Some(zombie_child.clone()))); + if unwaited_children.is_empty() { + return Some(Err(Error::with_message( + Errno::ECHILD, + "the process has no child to wait", + ))); } - } - if wait_options.contains(WaitOptions::WNOHANG) { - return Some(Ok(None)); - } + // return immediately if we find a zombie child + let zombie_child = unwaited_children + .iter() + .find(|child| child.status().is_zombie()); - // wait - None - }) - })??; + if let Some(zombie_child) = zombie_child { + let zombie_pid = zombie_child.pid(); + if wait_options.contains(WaitOptions::WNOWAIT) { + // does not reap child, directly return + return Some(Ok(Some(zombie_child.clone()))); + } else { + reap_zombie_child(current, zombie_pid); + return Some(Ok(Some(zombie_child.clone()))); + } + } + + if wait_options.contains(WaitOptions::WNOHANG) { + return Some(Ok(None)); + } + + // wait + None + }) + }, + )??; Ok(zombie_child) } diff --git a/kernel/src/syscall/arch/x86.rs b/kernel/src/syscall/arch/x86.rs index bf1bc1e10..b9556780d 100644 --- a/kernel/src/syscall/arch/x86.rs +++ b/kernel/src/syscall/arch/x86.rs @@ -73,6 +73,7 @@ use crate::syscall::{ pause::sys_pause, pipe::{sys_pipe, sys_pipe2}, poll::sys_poll, + ppoll::sys_ppoll, prctl::sys_prctl, pread64::sys_pread64, preadv::{sys_preadv, sys_preadv2, sys_readv}, @@ -336,6 +337,7 @@ impl_syscall_nums_and_dispatch_fn! { SYS_FCHMODAT = 268 => sys_fchmodat(args[..3]); SYS_FACCESSAT = 269 => sys_faccessat(args[..3]); SYS_PSELECT6 = 270 => sys_pselect6(args[..6]); + SYS_PPOLL = 271 => sys_ppoll(args[..5]); SYS_SET_ROBUST_LIST = 273 => sys_set_robust_list(args[..2]); SYS_UTIMENSAT = 280 => sys_utimensat(args[..4]); SYS_EPOLL_PWAIT = 281 => sys_epoll_pwait(args[..6]); diff --git a/kernel/src/syscall/mod.rs b/kernel/src/syscall/mod.rs index 70c15844a..bd3ba3e5e 100644 --- a/kernel/src/syscall/mod.rs +++ b/kernel/src/syscall/mod.rs @@ -80,6 +80,7 @@ mod open; mod pause; mod pipe; mod poll; +mod ppoll; mod prctl; mod pread64; mod preadv; diff --git a/kernel/src/syscall/poll.rs b/kernel/src/syscall/poll.rs index 8a11a4cf7..f2be8ba32 100644 --- a/kernel/src/syscall/poll.rs +++ b/kernel/src/syscall/poll.rs @@ -10,10 +10,38 @@ use crate::{ file_table::{FileDesc, FileTable}, }, prelude::*, - process::signal::Poller, + process::{signal::Poller, ResourceType}, }; -pub fn sys_poll(fds: Vaddr, nfds: u64, timeout: i32, ctx: &Context) -> Result { +pub fn sys_poll(fds: Vaddr, nfds: u32, timeout: i32, ctx: &Context) -> Result { + let timeout = if timeout >= 0 { + Some(Duration::from_millis(timeout as _)) + } else { + None + }; + + do_sys_poll(fds, nfds, timeout, ctx) +} + +pub fn do_sys_poll( + fds: Vaddr, + nfds: u32, + timeout: Option, + ctx: &Context, +) -> Result { + if nfds as u64 + > ctx + .process + .resource_limits() + .get_rlimit(ResourceType::RLIMIT_NOFILE) + .get_cur() + { + return_errno_with_message!( + Errno::EINVAL, + "the `nfds` value exceeds the `RLIMIT_NOFILE` value" + ) + } + let user_space = ctx.user_space(); let poll_fds = { @@ -33,12 +61,6 @@ pub fn sys_poll(fds: Vaddr, nfds: u64, timeout: i32, ctx: &Context) -> Result= 0 { - Some(Duration::from_millis(timeout as _)) - } else { - None - }; - debug!( "poll_fds = {:?}, nfds = {}, timeout = {:?}", poll_fds, nfds, timeout diff --git a/kernel/src/syscall/ppoll.rs b/kernel/src/syscall/ppoll.rs new file mode 100644 index 000000000..ed062791e --- /dev/null +++ b/kernel/src/syscall/ppoll.rs @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: MPL-2.0 + +use core::time::Duration; + +use super::{poll::do_sys_poll, SyscallReturn}; +use crate::{ + prelude::*, + process::signal::{sig_mask::SigMask, with_sigmask_changed}, + time::timespec_t, +}; + +pub fn sys_ppoll( + fds: Vaddr, + nfds: u32, + timespec_addr: Vaddr, + sigmask_addr: Vaddr, + sigmask_size: usize, + ctx: &Context, +) -> Result { + let user_space = ctx.user_space(); + + let timeout = if timespec_addr != 0 { + let time_spec = user_space.read_val::(timespec_addr)?; + Some(Duration::try_from(time_spec)?) + } else { + None + }; + + if sigmask_addr != 0 { + if sigmask_size != size_of::() { + return_errno_with_message!(Errno::EINVAL, "invalid sigmask size"); + } + + let sigmask = user_space.read_val::(sigmask_addr)?; + with_sigmask_changed(ctx, |_| sigmask, || do_sys_poll(fds, nfds, timeout, ctx)) + } else { + do_sys_poll(fds, nfds, timeout, ctx) + } + + // TODO: Write back the remaining time to `timespec_addr`. + // + // The ppoll system call should write back the remaining time, + // yet the function counterpart in libc hides this behavior to + // make the API more portable across different UNIX-like OSes. + // For the maximized Linux compatibility, we should follow Linux's behavior. + // But this cannot be readily achieved given how our internal synchronization primitives + // such as `Pause` and `WaitTimeout` work. +} diff --git a/kernel/src/syscall/pselect6.rs b/kernel/src/syscall/pselect6.rs index 9bc6a736b..cd78dc453 100644 --- a/kernel/src/syscall/pselect6.rs +++ b/kernel/src/syscall/pselect6.rs @@ -1,10 +1,13 @@ // SPDX-License-Identifier: MPL-2.0 -use core::{sync::atomic::Ordering, time::Duration}; +use core::time::Duration; use super::{select::do_sys_select, SyscallReturn}; use crate::{ - fs::file_table::FileDesc, prelude::*, process::signal::sig_mask::SigMask, time::timespec_t, + fs::file_table::FileDesc, + prelude::*, + process::signal::{sig_mask::SigMask, with_sigmask_changed}, + time::timespec_t, }; pub fn sys_pselect6( @@ -17,45 +20,34 @@ pub fn sys_pselect6( ctx: &Context, ) -> Result { let user_space = ctx.user_space(); - let old_simask = if sigmask_addr != 0 { - let sigmask_with_size: SigMaskWithSize = user_space.read_val(sigmask_addr)?; - - if !sigmask_with_size.is_valid() { - return_errno_with_message!(Errno::EINVAL, "sigmask size is invalid") - } - let old_sigmask = ctx - .posix_thread - .sig_mask() - .swap(sigmask_with_size.sigmask, Ordering::Relaxed); - - Some(old_sigmask) - } else { - None - }; let timeout = if timespec_addr != 0 { - let time_spec: timespec_t = user_space.read_val(timespec_addr)?; + let time_spec = user_space.read_val::(timespec_addr)?; Some(Duration::try_from(time_spec)?) } else { None }; - let res = do_sys_select( - nfds, - readfds_addr, - writefds_addr, - exceptfds_addr, - timeout, - ctx, - ); + let operate = || { + do_sys_select( + nfds, + readfds_addr, + writefds_addr, + exceptfds_addr, + timeout, + ctx, + ) + }; - if let Some(old_mask) = old_simask { - ctx.posix_thread - .sig_mask() - .store(old_mask, Ordering::Relaxed); + if sigmask_addr != 0 { + let sigmask_with_size = user_space.read_val::(sigmask_addr)?; + if !sigmask_with_size.is_valid() { + return_errno_with_message!(Errno::EINVAL, "sigmask size is invalid") + } + with_sigmask_changed(ctx, |_: SigMask| sigmask_with_size.sigmask, operate) + } else { + operate() } - - res } #[repr(C)] diff --git a/kernel/src/syscall/rt_sigsuspend.rs b/kernel/src/syscall/rt_sigsuspend.rs index ff7361ea5..e62caddcf 100644 --- a/kernel/src/syscall/rt_sigsuspend.rs +++ b/kernel/src/syscall/rt_sigsuspend.rs @@ -8,7 +8,7 @@ use crate::{ process::signal::{ constants::{SIGKILL, SIGSTOP}, sig_mask::SigMask, - with_signal_blocked, + with_sigmask_changed, }, }; @@ -37,7 +37,11 @@ pub fn sys_rt_sigsuspend( // Wait until receiving any signal let waiter = Waiter::new_pair().0; - with_signal_blocked(ctx, sigmask, || waiter.pause_until(|| None::<()>))?; + with_sigmask_changed( + ctx, + |old_mask| old_mask + sigmask, + || waiter.pause_until(|| None::<()>), + )?; // This syscall should always return `Err(EINTR)`. This path should never be reached. unreachable!("rt_sigsuspend always return EINTR"); diff --git a/test/syscall_test/Makefile b/test/syscall_test/Makefile index edf451060..7f48f1c37 100644 --- a/test/syscall_test/Makefile +++ b/test/syscall_test/Makefile @@ -27,6 +27,7 @@ TESTS ?= \ mount_test \ open_create_test \ open_test \ + ppoll_test \ prctl_setuid_test \ pread64_test \ preadv2_test \ diff --git a/test/syscall_test/blocklists/ppoll_test b/test/syscall_test/blocklists/ppoll_test new file mode 100644 index 000000000..482c6b22f --- /dev/null +++ b/test/syscall_test/blocklists/ppoll_test @@ -0,0 +1,3 @@ +PpollTest.NullFds +PpollTest.SignalMaskBlocksSignal +PpollTest.SignalMaskAllowsSignal \ No newline at end of file