asterinas/kernel/src/fs/epoll/epoll_file.rs

654 lines
21 KiB
Rust

// SPDX-License-Identifier: MPL-2.0
use core::{
borrow::Borrow,
sync::atomic::{AtomicBool, Ordering},
time::Duration,
};
use keyable_arc::{KeyableArc, KeyableWeak};
use ostd::sync::LocalIrqDisabled;
use super::*;
use crate::{
events::Observer,
fs::{
file_handle::FileLike,
utils::{InodeMode, IoctlCmd, Metadata},
},
process::signal::{PollHandle, Pollable, Pollee},
};
/// A file-like object that provides epoll API.
///
/// Conceptually, we maintain two lists: one consists of all interesting files,
/// which can be managed by the epoll ctl commands; the other are for ready files,
/// which are files that have some events. A epoll wait only needs to iterate the
/// ready list and poll each file to see if the file is ready for the interesting
/// I/O.
///
/// To maintain the ready list, we need to monitor interesting events that happen
/// on the files. To do so, the `EpollFile` registers itself as an `Observer` to
/// the monotored files. Thus, we can add a file to the ready list when an interesting
/// event happens on the file.
pub struct EpollFile {
// All interesting entries.
interest: Mutex<BTreeSet<EpollEntryHolder>>,
// Entries that are probably ready (having events happened).
ready: SpinLock<VecDeque<Weak<EpollEntry>>, LocalIrqDisabled>,
// A guard to ensure that ready entries can be popped by one thread at a time.
pop_guard: Mutex<PopGuard>,
// EpollFile itself is also pollable
pollee: Pollee,
// Any EpollFile is wrapped with Arc when created.
weak_self: Weak<Self>,
}
struct PopGuard;
impl EpollFile {
/// Creates a new epoll file.
pub fn new() -> Arc<Self> {
Arc::new_cyclic(|me| Self {
interest: Mutex::new(BTreeSet::new()),
ready: SpinLock::new(VecDeque::new()),
pop_guard: Mutex::new(PopGuard),
pollee: Pollee::new(IoEvents::empty()),
weak_self: me.clone(),
})
}
/// Control the interest list of the epoll file.
pub fn control(&self, cmd: &EpollCtl) -> Result<()> {
let fd = match cmd {
EpollCtl::Add(fd, ..) => *fd,
EpollCtl::Del(fd) => *fd,
EpollCtl::Mod(fd, ..) => *fd,
};
let file = {
let current = current!();
let file_table = current.file_table().lock();
file_table.get_file(fd)?.clone()
};
match *cmd {
EpollCtl::Add(fd, ep_event, ep_flags) => {
self.add_interest(fd, file, ep_event, ep_flags)
}
EpollCtl::Del(fd) => self.del_interest(fd, Arc::downgrade(&file).into()),
EpollCtl::Mod(fd, ep_event, ep_flags) => {
self.mod_interest(fd, file, ep_event, ep_flags)
}
}
}
fn add_interest(
&self,
fd: FileDesc,
file: Arc<dyn FileLike>,
ep_event: EpollEvent,
ep_flags: EpollFlags,
) -> Result<()> {
self.warn_unsupported_flags(&ep_flags);
// Add the new entry to the interest list and start monitoring its events
let ready_entry = {
let mut interest = self.interest.lock();
if interest.contains(&EpollEntryKey::from((fd, &file))) {
return_errno_with_message!(
Errno::EEXIST,
"the file is already in the interest list"
);
}
let entry = EpollEntry::new(fd, Arc::downgrade(&file).into(), self.weak_self.clone());
let events = entry.update(ep_event, ep_flags)?;
let ready_entry = if !events.is_empty() {
Some(entry.clone())
} else {
None
};
let inserted = interest.insert(entry.into());
assert!(inserted);
ready_entry
};
// Add the new entry to the ready list if the file is ready
if let Some(entry) = ready_entry {
self.push_ready(entry);
}
Ok(())
}
fn del_interest(&self, fd: FileDesc, file: KeyableWeak<dyn FileLike>) -> Result<()> {
// If this epoll entry is in the ready list, then we should delete it.
// But unfortunately, deleting an entry from the ready list has a
// complexity of O(N).
//
// To optimize performance, we postpone the actual deletion to the time
// when the ready list is scanned in `EpolFile::wait`. This can be done
// because the strong reference count will reach zero and `Weak::upgrade`
// will fail.
if !self
.interest
.lock()
.remove(&EpollEntryKey::from((fd, file)))
{
return_errno_with_message!(Errno::ENOENT, "the file is not in the interest list");
}
Ok(())
}
fn mod_interest(
&self,
fd: FileDesc,
file: Arc<dyn FileLike>,
new_ep_event: EpollEvent,
new_ep_flags: EpollFlags,
) -> Result<()> {
self.warn_unsupported_flags(&new_ep_flags);
// Update the epoll entry
let ready_entry = {
let interest = self.interest.lock();
let EpollEntryHolder(entry) = interest
.get(&EpollEntryKey::from((fd, &file)))
.ok_or_else(|| {
Error::with_message(Errno::ENOENT, "the file is not in the interest list")
})?;
let events = entry.update(new_ep_event, new_ep_flags)?;
if !events.is_empty() {
Some(entry.clone())
} else {
None
}
};
// Add the updated entry to the ready list if the file is ready
if let Some(entry) = ready_entry {
self.push_ready(entry);
}
Ok(())
}
/// Wait for interesting events happen on the files in the interest list
/// of the epoll file.
///
/// This method blocks until either some interesting events happen or
/// the timeout expires or a signal arrives. The first case returns
/// `Ok(events)`, where `events` is a `Vec` containing at most `max_events`
/// number of `EpollEvent`s. The second and third case returns errors.
///
/// When `max_events` equals to zero, the method returns when the timeout
/// expires or a signal arrives.
pub fn wait(&self, max_events: usize, timeout: Option<&Duration>) -> Result<Vec<EpollEvent>> {
let mut ep_events = Vec::new();
self.wait_events(IoEvents::IN, timeout, || {
self.pop_multi_ready(max_events, &mut ep_events);
if ep_events.is_empty() {
return Err(Error::with_message(
Errno::EAGAIN,
"there are no available events",
));
}
Ok(())
})?;
Ok(ep_events)
}
fn push_ready(&self, entry: Arc<EpollEntry>) {
// Note that we cannot take the `EpollEntryInner` lock because we are in the callback of
// the event observer. Doing so will cause dead locks due to inconsistent locking orders.
//
// We don't need to take the lock because
// - We always call `file.poll()` immediately after calling `self.set_enabled()` and
// `file.register_observer()`, so all events are caught either here or by the immediate
// poll; in other words, we don't lose any events.
// - Catching spurious events here is always fine because we always check them later before
// returning events to the user (in `EpollEntry::poll`).
if !entry.is_enabled() {
return;
}
let mut ready = self.ready.lock();
if !entry.is_ready() {
entry.set_ready(&ready);
ready.push_back(Arc::downgrade(&entry));
}
// Even if the entry is already set to ready,
// there might be new events that we are interested in.
// Wake the poller anyway.
self.pollee.add_events(IoEvents::IN);
}
fn pop_multi_ready(&self, max_events: usize, ep_events: &mut Vec<EpollEvent>) {
let pop_guard = self.pop_guard.lock();
let mut limit = None;
loop {
if ep_events.len() >= max_events {
break;
}
// Since we're holding `pop_guard`, no one else can pop the entries from the ready
// list. This guarantees that `pop_one_ready` will pop the ready entries we see when
// `pop_multi_ready` starts executing, so that such entries are never duplicated.
let Some((entry, new_limit)) = self.pop_one_ready(limit, &pop_guard) else {
break;
};
limit = Some(new_limit);
// Poll the events. If the file is dead, we will remove the entry.
let Some((ep_event, is_still_ready)) = entry.poll() else {
// We're removing entries whose files are dead. This can only fail if user programs
// remove the entry at the same time, and we run into some race conditions.
//
// However, this has very limited impact because we will never remove a wrong entry. So
// the error can be silently ignored.
let _ = self.del_interest(entry.fd(), entry.file_weak().clone());
continue;
};
// Save the event in the output vector, if any.
if let Some(event) = ep_event {
ep_events.push(event);
}
// Add the entry back to the ready list, if necessary.
if is_still_ready {
self.push_ready(entry);
}
}
}
fn pop_one_ready(
&self,
limit: Option<usize>,
_guard: &MutexGuard<PopGuard>,
) -> Option<(Arc<EpollEntry>, usize)> {
if limit == Some(0) {
return None;
}
let mut ready = self.ready.lock();
let mut limit = limit.unwrap_or_else(|| ready.len());
while limit > 0 {
limit -= 1;
// Pop the front entry. Note that `_guard` and `limit` guarantee that this entry must
// exist, so we can just unwrap it.
let weak_entry = ready.pop_front().unwrap();
// Clear the epoll file's events if there are no ready entries.
if ready.len() == 0 {
self.pollee.del_events(IoEvents::IN);
}
let Some(entry) = Weak::upgrade(&weak_entry) else {
// The entry has been deleted.
continue;
};
// Mark the entry as not ready. We can invoke `push_ready` later to add it back to the
// ready list if we need to.
entry.reset_ready(&ready);
return Some((entry, limit));
}
None
}
fn warn_unsupported_flags(&self, flags: &EpollFlags) {
if flags.intersects(EpollFlags::EXCLUSIVE | EpollFlags::WAKE_UP) {
warn!("{:?} contains unsupported flags", flags);
}
}
}
impl Pollable for EpollFile {
fn poll(&self, mask: IoEvents, poller: Option<&mut PollHandle>) -> IoEvents {
self.pollee.poll(mask, poller)
}
}
impl FileLike for EpollFile {
fn read(&self, _writer: &mut VmWriter) -> Result<usize> {
return_errno_with_message!(Errno::EINVAL, "epoll files do not support read");
}
fn write(&self, _reader: &mut VmReader) -> Result<usize> {
return_errno_with_message!(Errno::EINVAL, "epoll files do not support write");
}
fn ioctl(&self, _cmd: IoctlCmd, _arg: usize) -> Result<i32> {
return_errno_with_message!(Errno::EINVAL, "epoll files do not support ioctl");
}
fn register_observer(
&self,
observer: Weak<dyn Observer<IoEvents>>,
mask: IoEvents,
) -> Result<()> {
self.pollee.register_observer(observer, mask);
Ok(())
}
fn unregister_observer(
&self,
observer: &Weak<dyn Observer<IoEvents>>,
) -> Option<Weak<dyn Observer<IoEvents>>> {
self.pollee.unregister_observer(observer)
}
fn metadata(&self) -> Metadata {
// This is a dummy implementation.
// TODO: Add "anonymous inode fs" and link `EpollFile` to it.
Metadata::new_file(
0,
InodeMode::from_bits_truncate(0o600),
aster_block::BLOCK_SIZE,
)
}
}
/// An epoll entry that is contained in an epoll file.
///
/// Each epoll entry can be added, modified, or deleted by the `EpollCtl` command.
pub struct EpollEntry {
// The file descriptor and the file
key: EpollEntryKey,
// The event masks and flags
inner: Mutex<EpollEntryInner>,
// Whether the entry is enabled
is_enabled: AtomicBool,
// Whether the entry is in the ready list
is_ready: AtomicBool,
// The epoll file that contains this epoll entry
weak_epoll: Weak<EpollFile>,
// The epoll entry itself (always inside an `Arc`)
weak_self: Weak<Self>,
}
#[derive(PartialEq, Eq, PartialOrd, Ord)]
struct EpollEntryKey {
fd: FileDesc,
file: KeyableWeak<dyn FileLike>,
}
impl From<(FileDesc, KeyableWeak<dyn FileLike>)> for EpollEntryKey {
fn from(value: (FileDesc, KeyableWeak<dyn FileLike>)) -> Self {
Self {
fd: value.0,
file: value.1,
}
}
}
impl From<(FileDesc, &Arc<dyn FileLike>)> for EpollEntryKey {
fn from(value: (FileDesc, &Arc<dyn FileLike>)) -> Self {
Self {
fd: value.0,
file: KeyableWeak::from(Arc::downgrade(value.1)),
}
}
}
struct EpollEntryInner {
event: EpollEvent,
flags: EpollFlags,
}
impl Default for EpollEntryInner {
fn default() -> Self {
Self {
event: EpollEvent {
events: IoEvents::empty(),
user_data: 0,
},
flags: EpollFlags::empty(),
}
}
}
impl EpollEntry {
/// Creates a new epoll entry associated with the given epoll file.
pub fn new(
fd: FileDesc,
file: KeyableWeak<dyn FileLike>,
weak_epoll: Weak<EpollFile>,
) -> Arc<Self> {
Arc::new_cyclic(|me| Self {
key: EpollEntryKey { fd, file },
inner: Mutex::new(EpollEntryInner::default()),
is_enabled: AtomicBool::new(false),
is_ready: AtomicBool::new(false),
weak_epoll,
weak_self: me.clone(),
})
}
/// Get the epoll file associated with this epoll entry.
pub fn epoll_file(&self) -> Option<Arc<EpollFile>> {
self.weak_epoll.upgrade()
}
/// Get an instance of `Arc` that refers to this epoll entry.
pub fn self_arc(&self) -> Arc<Self> {
self.weak_self.upgrade().unwrap()
}
/// Get an instance of `Weak` that refers to this epoll entry.
pub fn self_weak(&self) -> Weak<Self> {
self.weak_self.clone()
}
/// Get the file associated with this epoll entry.
///
/// Since an epoll entry only holds a weak reference to the file,
/// it is possible (albeit unlikely) that the file has been dropped.
pub fn file(&self) -> Option<Arc<dyn FileLike>> {
self.key.file.upgrade().map(KeyableArc::into)
}
/// Polls the events of the file associated with this epoll entry.
///
/// This method returns `None` if the file is dead. Otherwise, it returns the epoll event (if
/// any) and a boolean value indicating whether the entry should be kept in the ready list
/// (`true`) or removed from the ready list (`false`).
pub fn poll(&self) -> Option<(Option<EpollEvent>, bool)> {
let file = self.file()?;
let inner = self.inner.lock();
// There are no events if the entry is disabled.
if !self.is_enabled() {
return Some((None, false));
}
// Check whether the entry's file has some events.
let io_events = file.poll(inner.event.events, None);
// If this entry's file has some events, we need to return them.
let ep_event = if !io_events.is_empty() {
Some(EpollEvent::new(io_events, inner.event.user_data))
} else {
None
};
// If there are events and the epoll entry is neither edge-triggered nor one-shot, we need
// to keep the entry in the ready list.
let is_still_ready = ep_event.is_some()
&& !inner
.flags
.intersects(EpollFlags::EDGE_TRIGGER | EpollFlags::ONE_SHOT);
// If there are events and the epoll entry is one-shot, we need to disable the entry until
// the user enables it again via `EpollCtl::Mod`.
if ep_event.is_some() && inner.flags.contains(EpollFlags::ONE_SHOT) {
self.reset_enabled(&inner);
}
Some((ep_event, is_still_ready))
}
/// Updates the epoll entry by the given event masks and flags.
///
/// This method needs to be called in response to `EpollCtl::Add` and `EpollCtl::Mod`.
pub fn update(&self, event: EpollEvent, flags: EpollFlags) -> Result<IoEvents> {
let file = self.file().unwrap();
let mut inner = self.inner.lock();
file.register_observer(self.self_weak(), event.events)?;
*inner = EpollEntryInner { event, flags };
self.set_enabled(&inner);
let events = file.poll(event.events, None);
Ok(events)
}
/// Shuts down the epoll entry.
///
/// This method needs to be called in response to `EpollCtl::Del`.
pub fn shutdown(&self) {
let inner = self.inner.lock();
if let Some(file) = self.file() {
file.unregister_observer(&(self.self_weak() as _)).unwrap();
};
self.reset_enabled(&inner);
}
/// Returns whether the epoll entry is in the ready list.
///
/// *Caution:* If this method is called without holding the lock of the ready list, the user
/// must ensure that the behavior is desired with respect to the way the ready list might be
/// modified concurrently.
pub fn is_ready(&self) -> bool {
self.is_ready.load(Ordering::Relaxed)
}
/// Marks the epoll entry as being in the ready list.
///
/// This method must be called while holding the lock of the ready list. This is the only way
/// to ensure that the "is ready" state matches the fact that the entry is actually in the
/// ready list.
pub fn set_ready(&self, _guard: &SpinLockGuard<VecDeque<Weak<EpollEntry>>, LocalIrqDisabled>) {
self.is_ready.store(true, Ordering::Relaxed);
}
/// Marks the epoll entry as not being in the ready list.
///
/// This method must be called while holding the lock of the ready list. This is the only way
/// to ensure that the "is ready" state matches the fact that the entry is actually in the
/// ready list.
pub fn reset_ready(
&self,
_guard: &SpinLockGuard<VecDeque<Weak<EpollEntry>>, LocalIrqDisabled>,
) {
self.is_ready.store(false, Ordering::Relaxed)
}
/// Returns whether the epoll entry is enabled.
///
/// *Caution:* If this method is called without holding the lock of the event masks and flags,
/// the user must ensure that the behavior is desired with respect to the way the event masks
/// and flags might be modified concurrently.
pub fn is_enabled(&self) -> bool {
self.is_enabled.load(Ordering::Relaxed)
}
/// Marks the epoll entry as enabled.
///
/// This method must be called while holding the lock of the event masks and flags. This is the
/// only way to ensure that the "is enabled" state describes the correct combination of the
/// event masks and flags.
fn set_enabled(&self, _guard: &MutexGuard<EpollEntryInner>) {
self.is_enabled.store(true, Ordering::Relaxed)
}
/// Marks the epoll entry as not enabled.
///
/// This method must be called while holding the lock of the event masks and flags. This is the
/// only way to ensure that the "is enabled" state describes the correct combination of the
/// event masks and flags.
fn reset_enabled(&self, _guard: &MutexGuard<EpollEntryInner>) {
self.is_enabled.store(false, Ordering::Relaxed)
}
/// Get the file descriptor associated with the epoll entry.
pub fn fd(&self) -> FileDesc {
self.key.fd
}
/// Get the file associated with this epoll entry.
pub fn file_weak(&self) -> &KeyableWeak<dyn FileLike> {
&self.key.file
}
}
impl Observer<IoEvents> for EpollEntry {
fn on_events(&self, _events: &IoEvents) {
if let Some(epoll_file) = self.epoll_file() {
epoll_file.push_ready(self.self_arc());
}
}
}
struct EpollEntryHolder(pub Arc<EpollEntry>);
impl PartialOrd for EpollEntryHolder {
fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
Some(self.cmp(other))
}
}
impl Ord for EpollEntryHolder {
fn cmp(&self, other: &Self) -> core::cmp::Ordering {
self.0.key.cmp(&other.0.key)
}
}
impl PartialEq for EpollEntryHolder {
fn eq(&self, other: &Self) -> bool {
self.0.key.eq(&other.0.key)
}
}
impl Eq for EpollEntryHolder {}
impl Borrow<EpollEntryKey> for EpollEntryHolder {
fn borrow(&self) -> &EpollEntryKey {
&self.0.key
}
}
impl From<Arc<EpollEntry>> for EpollEntryHolder {
fn from(value: Arc<EpollEntry>) -> Self {
Self(value)
}
}
impl Drop for EpollEntryHolder {
fn drop(&mut self) {
self.0.shutdown();
}
}