diff --git a/kernel/src/fs/notify/inotify.rs b/kernel/src/fs/notify/inotify.rs new file mode 100644 index 000000000..11e9f226a --- /dev/null +++ b/kernel/src/fs/notify/inotify.rs @@ -0,0 +1,688 @@ +// SPDX-License-Identifier: MPL-2.0 + +use alloc::{ + collections::VecDeque, + string::String, + sync::{Arc, Weak}, +}; +use core::{ + any::Any, + fmt::Display, + sync::atomic::{AtomicBool, AtomicU32, AtomicU64, Ordering}, +}; + +use bitflags::bitflags; +use hashbrown::HashMap; +use ostd::{ + mm::VmWriter, + sync::{Mutex, SpinLock}, +}; + +use crate::{ + current_userspace, + events::IoEvents, + fs::{ + file_handle::FileLike, + file_table::FdFlags, + notify::{FsEventSubscriber, FsEvents}, + path::{Path, RESERVED_MOUNT_ID}, + pseudofs::anon_inodefs_shared_inode, + utils::{AccessMode, CreationFlags, Inode, IoctlCmd, StatusFlags}, + }, + prelude::*, + process::signal::{PollHandle, Pollable, Pollee}, + return_errno_with_message, +}; + +#[derive(Clone)] +struct SubscriberEntry { + inode: Weak, + subscriber: Weak, +} + +/// A file-like object that provides inotify functionality. +/// +/// InotifyFile accepts events from multiple inotify subscribers (watches) on different inodes. +/// Users should read events from this file to receive notifications about filesystem changes. +pub struct InotifyFile { + // Lock to serialize watch updates and removals. + watch_lock: Mutex<()>, + // Next watch descriptor to allocate. + next_wd: AtomicU32, + // A map from watch descriptor to subscriber entry. + watch_map: RwLock>, + // Whether the file is opened in non-blocking mode. + is_nonblocking: AtomicBool, + // Bounded queue of inotify events. + event_queue: SpinLock>, + // Maximum capacity of the event queue. + queue_capacity: usize, + // A pollable object for this inotify file. + pollee: Pollee, + // A weak reference to this inotify file. + this: Weak, +} + +impl Drop for InotifyFile { + /// Cleans up all subscribers when the inotify file is dropped. + /// This will remove all subscribers from their inodes. + fn drop(&mut self) { + let mut watch_map = self.watch_map.write(); + for (_, entry) in watch_map.iter() { + let (Some(inode), Some(subscriber)) = + (entry.inode.upgrade(), entry.subscriber.upgrade()) + else { + continue; + }; + + if inode.fs_event_publisher().remove_subscriber(&subscriber) { + inode.fs().fs_event_subscriber_stats().remove_subscriber(); + } + } + watch_map.clear(); + } +} + +/// Default max queued events. +/// +/// Reference: +const DEFAULT_MAX_QUEUED_EVENTS: usize = 16384; + +impl InotifyFile { + /// Creates a new inotify file. + /// + /// Watch Description starts from 1. + /// Reference: + pub fn new(is_nonblocking: bool) -> Result> { + let event_queue = VecDeque::try_with_capacity(DEFAULT_MAX_QUEUED_EVENTS).map_err(|_| { + Error::with_message(Errno::ENOMEM, "Insufficient kernel memory is available") + })?; + + Ok(Arc::new_cyclic(|weak_self| Self { + watch_lock: Mutex::new(()), + next_wd: AtomicU32::new(1), + watch_map: RwLock::new(HashMap::new()), + is_nonblocking: AtomicBool::new(is_nonblocking), + event_queue: SpinLock::new(event_queue), + queue_capacity: DEFAULT_MAX_QUEUED_EVENTS, + pollee: Pollee::new(), + this: weak_self.clone(), + })) + } + + /// Allocates a new watch descriptor. + fn alloc_wd(&self) -> Result { + const MAX_VALID_WD: u32 = i32::MAX as u32; + + let new_wd = self.next_wd.fetch_add(1, Ordering::Relaxed); + if new_wd > MAX_VALID_WD { + // Rollback the allocation if we exceeded the limit + self.next_wd.fetch_sub(1, Ordering::Relaxed); + return_errno_with_message!(Errno::ENOSPC, "Inotify watches limit reached"); + } + Ok(new_wd) + } + + /// Adds or updates a watch on a path. + /// + /// If a watch on the path is not found, creates a new watch. + /// If a watch on the path is found, updates it. + pub fn add_watch( + &self, + path: &Path, + interesting: InotifyEvents, + options: InotifyControls, + ) -> Result { + // Serialize updates so concurrent callers do not create duplicate watches. + let _guard = self.watch_lock.lock(); + + // Try to update existing subscriber first + match self.update_existing_subscriber(path, interesting, options) { + Ok(wd) => Ok(wd), + Err(e) if e.error() == Errno::ENOENT => { + // Subscriber not found, create a new one + self.create_new_subscriber(path, interesting, options) + } + Err(e) => Err(e), + } + } + + /// Removes a watch by watch descriptor. + pub fn remove_watch(&self, wd: u32) -> Result<()> { + let _guard = self.watch_lock.lock(); + + let mut watch_map = self.watch_map.write(); + let Some(entry) = watch_map.remove(&wd) else { + return_errno_with_message!(Errno::EINVAL, "watch not found"); + }; + + // When concurrent removal happens, the weak refs may have already been dropped. + // Try to upgrade the weak refs; if either side is gone, treat as already removed. + let (inode, subscriber) = match (entry.inode.upgrade(), entry.subscriber.upgrade()) { + (Some(i), Some(s)) => (i, s), + _ => return_errno_with_message!(Errno::EINVAL, "watch not found"), + }; + + if inode.fs_event_publisher().remove_subscriber(&subscriber) { + inode.fs().fs_event_subscriber_stats().remove_subscriber(); + } + Ok(()) + } + + /// Updates an existing inotify subscriber. + fn update_existing_subscriber( + &self, + path: &Path, + interesting: InotifyEvents, + options: InotifyControls, + ) -> Result { + let publisher = path.inode().fs_event_publisher(); + let inotify_file = self.this(); + + let result = publisher.find_subscriber_and_process(|subscriber| { + // Try to downcast to InotifySubscriber and check if it belongs to this InotifyFile. + let inotify_subscriber = + (subscriber.as_ref() as &dyn Any).downcast_ref::()?; + + if Arc::ptr_eq(&inotify_subscriber.inotify_file(), &inotify_file) { + // Found the matching subscriber, perform the update in place. + Some(inotify_subscriber.update(interesting, options)) + } else { + None + } + }); + + if let Some(result) = result { + // Notify publisher to recalculate aggregated events after subscriber update. + publisher.update_subscriber_events(); + return result; + } + + // If the subscriber is not found, return ENOENT. + return_errno_with_message!(Errno::ENOENT, "watch not found"); + } + + /// Creates a new FS event subscriber and activates it. + fn create_new_subscriber( + &self, + path: &Path, + interesting: InotifyEvents, + options: InotifyControls, + ) -> Result { + let inotify_subscriber = InotifySubscriber::new(self.this(), interesting, options)?; + let subscriber = inotify_subscriber.clone() as Arc; + + if path + .inode() + .fs_event_publisher() + .add_subscriber(subscriber.clone()) + { + path.inode() + .fs() + .fs_event_subscriber_stats() + .add_subscriber(); + } + + let wd = inotify_subscriber.wd(); + self.watch_map.write().insert( + wd, + SubscriberEntry { + inode: Arc::downgrade(path.inode()), + subscriber: Arc::downgrade(&subscriber), + }, + ); + + Ok(wd) + } + + /// Sends an inotify event to the inotify file. + /// The event will be queued and can be read by users. + /// If the event can be merged with the last event in the queue, it will be merged. + /// The event is only queued if it matches one of the subscriber's interesting events. + fn receive_event(&self, subscriber: &InotifySubscriber, event: FsEvents, name: Option) { + let wd = subscriber.wd(); + if !event.contains(FsEvents::IN_IGNORED) && !subscriber.is_interesting(event) { + return; + } + + let new_event = InotifyEvent::new(wd, event, 0, name); + + { + let mut event_queue = self.event_queue.lock(); + if let Some(last_event) = event_queue.back() { + if can_merge_events(last_event, &new_event) { + event_queue.pop_back(); + event_queue.push_back(new_event); + // New or merged event makes the file readable + self.pollee.notify(IoEvents::IN); + return; + } + } + + // If the queue is full, drop the event. + // We do not return an error to the caller. + if event_queue.len() >= self.queue_capacity { + return; + } + + event_queue.push_back(new_event); + } + self.pollee.notify(IoEvents::IN); + } + + /// Pops an event from the notification queue. + fn pop_event(&self) -> Option { + let mut event_queue = self.event_queue.lock(); + event_queue.pop_front() + } + + /// Gets the total size of all events in the notification queue. + fn get_all_event_size(&self) -> usize { + let event_queue = self.event_queue.lock(); + + event_queue.iter().map(|event| event.get_size()).sum() + } + + /// Tries to read events from the notification queue. + fn try_read(&self, writer: &mut VmWriter) -> Result { + const HEADER_SIZE: usize = core::mem::size_of::(); + if writer.avail() < HEADER_SIZE { + return_errno_with_message!(Errno::EINVAL, "buffer is too small"); + } + + let mut size = 0; + let mut consumed_events = 0; + + while let Some(event) = self.pop_event() { + match event.copy_to_user(writer) { + Ok(event_size) => { + size += event_size; + consumed_events += 1; + } + Err(e) => { + self.event_queue.lock().push_front(event); + if consumed_events == 0 { + return Err(e); + } + break; + } + } + } + + if consumed_events == 0 { + return_errno_with_message!(Errno::EAGAIN, "no inotify events available"); + } + + // Only invalidate if the queue is empty after reading + let queue_empty = self.event_queue.lock().is_empty(); + if queue_empty { + self.pollee.invalidate(); + } + Ok(size) + } + + fn this(&self) -> Arc { + self.this.upgrade().unwrap() + } +} + +impl Pollable for InotifyFile { + fn poll(&self, mask: IoEvents, poller: Option<&mut PollHandle>) -> IoEvents { + self.pollee.poll_with(mask, poller, || { + if self.event_queue.lock().is_empty() { + IoEvents::empty() + } else { + IoEvents::IN + } + }) + } +} + +impl FileLike for InotifyFile { + fn read(&self, writer: &mut VmWriter) -> Result { + if self.is_nonblocking.load(Ordering::Relaxed) { + self.try_read(writer) + } else { + self.wait_events(IoEvents::IN, None, || self.try_read(writer)) + } + } + + fn ioctl(&self, cmd: IoctlCmd, arg: usize) -> Result { + match cmd { + IoctlCmd::FIONREAD => { + let size = self.get_all_event_size(); + let size_addr = arg; + current_userspace!().write_val(size_addr, &size)?; + Ok(0) + } + _ => return_errno_with_message!(Errno::EINVAL, "ioctl is not supported"), + } + } + + fn status_flags(&self) -> StatusFlags { + if self.is_nonblocking.load(Ordering::Relaxed) { + StatusFlags::O_NONBLOCK + } else { + StatusFlags::empty() + } + } + + fn set_status_flags(&self, new_flags: StatusFlags) -> Result<()> { + self.is_nonblocking.store( + new_flags.contains(StatusFlags::O_NONBLOCK), + Ordering::Relaxed, + ); + Ok(()) + } + + fn access_mode(&self) -> AccessMode { + AccessMode::O_RDONLY + } + + fn inode(&self) -> &Arc { + anon_inodefs_shared_inode() + } + + fn dump_proc_fdinfo(self: Arc, fd_flags: FdFlags) -> Box { + struct FdInfo { + inner: Arc, + fd_flags: FdFlags, + } + + impl Display for FdInfo { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let mut flags = self.inner.status_flags().bits() | self.inner.access_mode() as u32; + if self.fd_flags.contains(FdFlags::CLOEXEC) { + flags |= CreationFlags::O_CLOEXEC.bits(); + } + + writeln!(f, "pos:\t{}", 0)?; + writeln!(f, "flags:\t0{:o}", flags)?; + // TODO: This should be the mount ID of the pseudo filesystem. + writeln!(f, "mnt_id:\t{}", RESERVED_MOUNT_ID)?; + writeln!(f, "ino:\t{}", self.inner.inode().ino())?; + + for (wd, entry) in self.inner.watch_map.read().iter() { + let Some(inode) = entry.inode.upgrade() else { + continue; + }; + let Some(subscriber) = entry.subscriber.upgrade() else { + continue; + }; + let mask = subscriber.interesting_events().bits(); + let sdev = inode.fs().sb().fsid; + writeln!( + f, + "inotify wd:{} ino:{:x} sdev:{:x} mask:{:x} ignored_mask:0 fhandle-bytes:0 fhandle-type:0 f_handle:0", + wd, + inode.ino(), + sdev, + mask + )?; + } + + Ok(()) + } + } + + Box::new(FdInfo { + inner: self, + fd_flags, + }) + } +} + +/// Checks if the event type is mergeable. +fn is_mergeable_event_type(event: FsEvents) -> bool { + event & (FsEvents::MODIFY | FsEvents::ATTRIB | FsEvents::ACCESS) != FsEvents::empty() +} + +/// Checks if two inotify events can be merged. +fn can_merge_events(existing: &InotifyEvent, new_event: &InotifyEvent) -> bool { + existing.wd() == new_event.wd() + && existing.name == new_event.name + && existing.header.event == new_event.header.event + && is_mergeable_event_type(new_event.header.event) +} + +/// Represents a watch on a file or directory in the inotify system. +/// +/// In the inotify implementation, a watch is equivalent to a subscriber. The subscriber +/// specifies which events it wants to monitor using `InotifyEvents`, and control options +/// using `InotifyControls`. Both the event mask and control options are stored in a single +/// `AtomicU64` for atomic updates: the high 32 bits store options, and the low 32 bits +/// store the event mask. +pub struct InotifySubscriber { + // interesting events and control options. + interesting_and_controls: AtomicU64, + // Watch descriptor. + wd: u32, + // reference to the owning inotify file. + inotify_file: Arc, +} + +impl InotifySubscriber { + /// Creates a new InotifySubscriber with initial interesting events and options. + /// The `interesting_and_controls` field is packed into a u64: the high 32 bits store options, + /// and the low 32 bits store interesting events. + pub fn new( + inotify_file: Arc, + interesting: InotifyEvents, + options: InotifyControls, + ) -> Result> { + let wd = inotify_file.alloc_wd()?; + let this = Arc::new(Self { + interesting_and_controls: AtomicU64::new(0), + wd, + inotify_file, + }); + // Initialize the interesting_and_controls atomically + this.update_interesting_and_controls(interesting.bits(), options.bits()); + Ok(this) + } + + pub fn wd(&self) -> u32 { + self.wd + } + + fn interesting(&self) -> InotifyEvents { + let flags = self.interesting_and_controls.load(Ordering::Relaxed); + InotifyEvents::from_bits_truncate((flags & 0xFFFFFFFF) as u32) + } + + fn options(&self) -> InotifyControls { + let flags = self.interesting_and_controls.load(Ordering::Relaxed); + InotifyControls::from_bits_truncate((flags >> 32) as u32) + } + + pub fn inotify_file(&self) -> Arc { + self.inotify_file.clone() + } + + /// Updates the interesting events and options atomically using a CAS (Compare-And-Swap) loop. + fn update(&self, interesting: InotifyEvents, options: InotifyControls) -> Result { + if options.contains(InotifyControls::MASK_CREATE) { + return_errno_with_message!(Errno::EEXIST, "watch already exists"); + } + + let mut merged_interesting = interesting; + let mut merged_options = options; + + if options.contains(InotifyControls::MASK_ADD) { + merged_interesting |= self.interesting(); + merged_options |= self.options(); + } + merged_options.remove(InotifyControls::MASK_ADD); + + self.update_interesting_and_controls(merged_interesting.bits(), merged_options.bits()); + Ok(self.wd()) + } + + /// Atomically updates the interesting events and options using a CAS loop to ensure consistency. + fn update_interesting_and_controls(&self, new_interesting: u32, new_options: u32) { + let new_flags = ((new_options as u64) << 32) | (new_interesting as u64); + self.interesting_and_controls + .store(new_flags, Ordering::Relaxed); + } + + /// Checks if the event matches the subscriber's interesting events. + fn is_interesting(&self, event: FsEvents) -> bool { + self.interesting().bits() & event.bits() != 0 + } +} + +impl FsEventSubscriber for InotifySubscriber { + /// Sends FS events to the inotify file. + fn deliver_event(&self, event: FsEvents, name: Option) { + let inotify_file = self.inotify_file(); + inotify_file.receive_event(self, event, name); + } + + /// Returns the events that this subscriber is interested in. + fn interesting_events(&self) -> FsEvents { + let inotify_events = self.interesting(); + FsEvents::from_bits_truncate(inotify_events.bits()) + } +} + +/// Represents an inotify event that can be read by users. +struct InotifyEvent { + header: InotifyEventHeader, + name: Option, +} + +/// The header of an inotify event. +/// +/// see +#[repr(C)] +struct InotifyEventHeader { + wd: u32, + event: FsEvents, + cookie: u32, + name_len: u32, +} + +impl InotifyEvent { + fn new(wd: u32, event: FsEvents, cookie: u32, name: Option) -> Self { + // Calculate actual name length including null terminator + let actual_name_len = name.as_ref().map_or(0, |name| name.len() + 1); + // Calculate padded name length aligned to sizeof(struct inotify_event) + let pad_name_len = Self::round_event_name_len(actual_name_len); + + Self { + header: InotifyEventHeader { + wd, + event, + cookie, + name_len: pad_name_len as u32, + }, + name, + } + } + + fn wd(&self) -> u32 { + self.header.wd + } + + fn event(&self) -> FsEvents { + self.header.event + } + + fn cookie(&self) -> u32 { + self.header.cookie + } + + fn name_len(&self) -> u32 { + self.header.name_len + } +} + +impl InotifyEvent { + /// Rounds up the name length to align with sizeof(struct inotify_event). + fn round_event_name_len(name_len: usize) -> usize { + const INOTIFY_EVENT_SIZE: usize = core::mem::size_of::(); + (name_len + INOTIFY_EVENT_SIZE - 1) & !(INOTIFY_EVENT_SIZE - 1) + } + + fn copy_to_user(&self, writer: &mut VmWriter) -> Result { + let mut total_size = 0; + + // Calculate actual name length including null terminator + let actual_name_len = self.name.as_ref().map_or(0, |name| name.len() + 1); + // Calculate padded name length aligned to sizeof(struct inotify_event) + let pad_name_len = Self::round_event_name_len(actual_name_len); + + // Write the event header + writer.write_val(&self.wd())?; + writer.write_val(&self.event().bits())?; + writer.write_val(&self.cookie())?; + writer.write_val(&self.name_len())?; + total_size += core::mem::size_of::(); + + if let Some(name) = self.name.as_ref() { + // Write the actual name bytes + for byte in name.as_bytes() { + writer.write_val(byte)?; + } + // Write null terminator + writer.write_val(&b'\0')?; + total_size += name.len() + 1; + + // Fill remaining bytes with zeros for alignment + let padding_len = pad_name_len - actual_name_len; + if padding_len > 0 { + let filled = writer.fill_zeros(padding_len).map_err(|(e, _)| e)?; + total_size += filled; + } + } + + Ok(total_size) + } + + fn get_size(&self) -> usize { + const HEADER_SIZE: usize = core::mem::size_of::(); // 16 bytes + let actual_name_len = self.name.as_ref().map_or(0, |name| name.len() + 1); + let pad_name_len = Self::round_event_name_len(actual_name_len); + HEADER_SIZE + pad_name_len + } +} + +bitflags! { + /// InotifyEvents represents the set of events that a subscriber wants to monitor. + /// These events are used to filter notifications sent to the subscriber. + pub struct InotifyEvents: u32 { + const ACCESS = 1 << 0; // File was accessed + const MODIFY = 1 << 1; // File was modified + const ATTRIB = 1 << 2; // Metadata changed + const CLOSE_WRITE = 1 << 3; // Writable file was closed + const CLOSE_NOWRITE = 1 << 4; // Unwritable file closed + const OPEN = 1 << 5; // File was opened + const MOVED_FROM = 1 << 6; // File was moved from X + const MOVED_TO = 1 << 7; // File was moved to Y + const CREATE = 1 << 8; // Subfile was created + const DELETE = 1 << 9; // Subfile was deleted + const DELETE_SELF = 1 << 10; // Self was deleted + const MOVE_SELF = 1 << 11; // Self was moved + const UNMOUNT = 1 << 13; // Backing fs was unmounted + const Q_OVERFLOW = 1 << 14; // Event queue overflowed + const IGNORED = 1 << 15; // File was ignored + const CLOSE = Self::CLOSE_WRITE.bits() | Self::CLOSE_NOWRITE.bits(); // Close events + const MOVE = Self::MOVED_FROM.bits() | Self::MOVED_TO.bits(); // Move events + const ALL_EVENTS = Self::ACCESS.bits() | Self::MODIFY.bits() | Self::ATTRIB.bits() | + Self::CLOSE_WRITE.bits() | Self::CLOSE_NOWRITE.bits() | Self::OPEN.bits() | + Self::MOVED_FROM.bits() | Self::MOVED_TO.bits() | Self::DELETE.bits() | + Self::CREATE.bits() | Self::DELETE_SELF.bits() | Self::MOVE_SELF.bits(); + } +} + +bitflags! { + pub struct InotifyControls: u32 { + const ONLYDIR = 1 << 24; // Only watch directories + const DONT_FOLLOW = 1 << 25; // Don't follow symlinks + const EXCL_UNLINK = 1 << 26; // Exclude events on unlinked objects + const MASK_CREATE = 1 << 28; // Only create watches + const MASK_ADD = 1 << 29; // Add to existing watch mask + const ISDIR = 1 << 30; // Event occurred on a directory + const ONESHOT = 1 << 31; // Send event once + } +} diff --git a/kernel/src/fs/notify/mod.rs b/kernel/src/fs/notify/mod.rs index a2caa0ec5..9af2b5ef9 100644 --- a/kernel/src/fs/notify/mod.rs +++ b/kernel/src/fs/notify/mod.rs @@ -15,6 +15,8 @@ use crate::{ prelude::*, }; +pub mod inotify; + use super::utils::{Inode, InodeType}; /// Publishes filesystem events to subscribers. @@ -228,17 +230,19 @@ define_atomic_version_of_integer_like_type!(FsEvents, { /// Notifies that a file was accessed. pub fn on_access(file: &Arc) { // TODO: Check fmode flags (FMODE_NONOTIFY, FMODE_NONOTIFY_PERM). - if let Some(path) = file.path() { - if !path - .inode() - .fs() - .fs_event_subscriber_stats() - .has_any_subscribers() - { - return; - } - notify_parent(path, FsEvents::ACCESS, path.effective_name()); + let Some(path) = file.path() else { + return; + }; + + if !path + .inode() + .fs() + .fs_event_subscriber_stats() + .has_any_subscribers() + { + return; } + notify_parent(path, FsEvents::ACCESS, path.effective_name()); } /// Notifies that a file was modified. @@ -356,17 +360,19 @@ pub fn on_create(file_path: &Path, name: String) { /// Notifies that a file was opened. pub fn on_open(file: &Arc) { // TODO: Check fmode flags (FMODE_NONOTIFY, FMODE_NONOTIFY_PERM). - if let Some(path) = file.path() { - if !path - .inode() - .fs() - .fs_event_subscriber_stats() - .has_any_subscribers() - { - return; - } - notify_parent(path, FsEvents::OPEN, path.effective_name()); + let Some(path) = file.path() else { + return; + }; + + if !path + .inode() + .fs() + .fs_event_subscriber_stats() + .has_any_subscribers() + { + return; } + notify_parent(path, FsEvents::OPEN, path.effective_name()); } /// Notifies that a file was closed. @@ -422,7 +428,7 @@ fn notify_parent(path: &Path, mut events: FsEvents, name: String) { /// Sends a filesystem notification event to all subscribers of an inode. /// -/// This is the main entry point for fsnotify. The VFS layer calls hook-specific +/// This is the main entry point for FS event notification. The VFS layer calls hook-specific /// functions in `fs/notify/`, which then call this function to broadcast events /// to all registered subscribers through the inode's publisher. fn notify_inode(inode: &Arc, events: FsEvents, name: Option) { diff --git a/kernel/src/lib.rs b/kernel/src/lib.rs index ae43cf8de..103d281bd 100644 --- a/kernel/src/lib.rs +++ b/kernel/src/lib.rs @@ -22,6 +22,7 @@ #![feature(trait_alias)] #![feature(trait_upcasting)] #![feature(associated_type_defaults)] +#![feature(try_with_capacity)] #![register_tool(component_access_control)] extern crate alloc; diff --git a/kernel/src/syscall/arch/loongarch.rs b/kernel/src/syscall/arch/loongarch.rs index 0040f3fd4..dd29cead2 100644 --- a/kernel/src/syscall/arch/loongarch.rs +++ b/kernel/src/syscall/arch/loongarch.rs @@ -54,6 +54,7 @@ use super::{ getuid::sys_getuid, getxattr::{sys_fgetxattr, sys_getxattr, sys_lgetxattr}, impl_syscall_nums_and_dispatch_fn, + inotify::{sys_inotify_add_watch, sys_inotify_init1, sys_inotify_rm_watch}, ioctl::sys_ioctl, kill::sys_kill, link::sys_linkat, @@ -181,6 +182,9 @@ impl_syscall_nums_and_dispatch_fn! { SYS_DUP = 23 => sys_dup(args[..1]); SYS_DUP3 = 24 => sys_dup3(args[..3]); SYS_FCNTL = 25 => sys_fcntl(args[..3]); + SYS_INOTIFY_INIT1 = 26 => sys_inotify_init1(args[..1]); + SYS_INOTIFY_ADD_WATCH = 27 => sys_inotify_add_watch(args[..3]); + SYS_INOTIFY_RM_WATCH = 28 => sys_inotify_rm_watch(args[..2]); SYS_IOCTL = 29 => sys_ioctl(args[..3]); SYS_IOPRIO_SET = 30 => sys_ioprio_set(args[..3]); SYS_IOPRIO_GET = 31 => sys_ioprio_get(args[..2]); diff --git a/kernel/src/syscall/arch/riscv.rs b/kernel/src/syscall/arch/riscv.rs index 30153882b..027425f02 100644 --- a/kernel/src/syscall/arch/riscv.rs +++ b/kernel/src/syscall/arch/riscv.rs @@ -54,6 +54,7 @@ use super::{ getuid::sys_getuid, getxattr::{sys_fgetxattr, sys_getxattr, sys_lgetxattr}, impl_syscall_nums_and_dispatch_fn, + inotify::{sys_inotify_add_watch, sys_inotify_init1, sys_inotify_rm_watch}, ioctl::sys_ioctl, kill::sys_kill, link::sys_linkat, @@ -181,6 +182,9 @@ impl_syscall_nums_and_dispatch_fn! { SYS_DUP = 23 => sys_dup(args[..1]); SYS_DUP3 = 24 => sys_dup3(args[..3]); SYS_FCNTL = 25 => sys_fcntl(args[..3]); + SYS_INOTIFY_INIT1 = 26 => sys_inotify_init1(args[..1]); + SYS_INOTIFY_ADD_WATCH = 27 => sys_inotify_add_watch(args[..3]); + SYS_INOTIFY_RM_WATCH = 28 => sys_inotify_rm_watch(args[..2]); SYS_IOCTL = 29 => sys_ioctl(args[..3]); SYS_IOPRIO_SET = 30 => sys_ioprio_set(args[..3]); SYS_IOPRIO_GET = 31 => sys_ioprio_get(args[..2]); diff --git a/kernel/src/syscall/arch/x86.rs b/kernel/src/syscall/arch/x86.rs index 70111582b..a5c29b636 100644 --- a/kernel/src/syscall/arch/x86.rs +++ b/kernel/src/syscall/arch/x86.rs @@ -61,6 +61,7 @@ use super::{ getuid::sys_getuid, getxattr::{sys_fgetxattr, sys_getxattr, sys_lgetxattr}, impl_syscall_nums_and_dispatch_fn, + inotify::{sys_inotify_add_watch, sys_inotify_init, sys_inotify_init1, sys_inotify_rm_watch}, ioctl::sys_ioctl, kill::sys_kill, link::{sys_link, sys_linkat}, @@ -349,6 +350,9 @@ impl_syscall_nums_and_dispatch_fn! { SYS_WAITID = 247 => sys_waitid(args[..5]); SYS_IOPRIO_SET = 251 => sys_ioprio_set(args[..3]); SYS_IOPRIO_GET = 252 => sys_ioprio_get(args[..2]); + SYS_INOTIFY_INIT = 253 => sys_inotify_init(args[..0]); + SYS_INOTIFY_ADD_WATCH = 254 => sys_inotify_add_watch(args[..3]); + SYS_INOTIFY_RM_WATCH = 255 => sys_inotify_rm_watch(args[..2]); SYS_OPENAT = 257 => sys_openat(args[..4]); SYS_MKDIRAT = 258 => sys_mkdirat(args[..3]); SYS_MKNODAT = 259 => sys_mknodat(args[..4]); @@ -380,6 +384,7 @@ impl_syscall_nums_and_dispatch_fn! { SYS_EPOLL_CREATE1 = 291 => sys_epoll_create1(args[..1]); SYS_DUP3 = 292 => sys_dup3(args[..3]); SYS_PIPE2 = 293 => sys_pipe2(args[..2]); + SYS_INOTIFY_INIT1 = 294 => sys_inotify_init1(args[..1]); SYS_PREADV = 295 => sys_preadv(args[..5]); SYS_PWRITEV = 296 => sys_pwritev(args[..5]); SYS_PRLIMIT64 = 302 => sys_prlimit64(args[..4]); diff --git a/kernel/src/syscall/inotify.rs b/kernel/src/syscall/inotify.rs new file mode 100644 index 000000000..01ff3161e --- /dev/null +++ b/kernel/src/syscall/inotify.rs @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: MPL-2.0 + +use super::SyscallReturn; +use crate::{ + fs::{ + file_table::{get_file_fast, FdFlags, FileDesc}, + fs_resolver::FsPath, + notify::inotify::{InotifyControls, InotifyEvents, InotifyFile}, + utils::{InodeType, Permission}, + }, + prelude::*, + syscall::constants::MAX_FILENAME_LEN, +}; + +pub fn sys_inotify_init(ctx: &Context) -> Result { + do_inotify_init(0, ctx) +} + +pub fn sys_inotify_init1(flags: u32, ctx: &Context) -> Result { + do_inotify_init(flags, ctx) +} + +fn do_inotify_init(flags: u32, ctx: &Context) -> Result { + debug!("inotify_init flags = {}", flags); + let flags = InotifyFileFlags::from_bits(flags) + .ok_or(Error::with_message(Errno::EINVAL, "invalid flags"))?; + let fd_flags = if flags.contains(InotifyFileFlags::CLOEXEC) { + FdFlags::CLOEXEC + } else { + FdFlags::empty() + }; + let is_nonblocking = flags.contains(InotifyFileFlags::NONBLOCK); + let file = InotifyFile::new(is_nonblocking)?; + let file_table = ctx.thread_local.borrow_file_table(); + let fd = file_table.unwrap().write().insert(file, fd_flags); + Ok(SyscallReturn::Return(fd as _)) +} + +pub fn sys_inotify_add_watch( + fd: FileDesc, + path: Vaddr, + flags: u32, + ctx: &Context, +) -> Result { + debug!("fd = {:?}, path = {:?}, flags = {}", fd, path, flags); + if flags == 0 { + return_errno_with_message!(Errno::EINVAL, "flags is 0, no events to watch"); + } + // Parse flags to InotifyEvents. + let (interesting, options) = parse_inotify_watch_request(flags)?; + + if options.contains(InotifyControls::MASK_ADD) && options.contains(InotifyControls::MASK_CREATE) + { + return_errno_with_message!(Errno::EINVAL, "flags is invalid"); + } + + let path = ctx.user_space().read_cstring(path, MAX_FILENAME_LEN)?; + let mut file_table = ctx.thread_local.borrow_file_table_mut(); + let file = get_file_fast!(&mut file_table, fd); + + // Verify that the file is an inotify file. + let inotify_file = match file.downcast_ref::() { + Some(inotify_file) => inotify_file, + None => return_errno_with_message!(Errno::EINVAL, "file is not an inotify file"), + }; + + let dentry = { + let path = path.to_string_lossy(); + let fs_path = FsPath::try_from(path.as_ref())?; + + if options.contains(InotifyControls::DONT_FOLLOW) { + ctx.thread_local + .borrow_fs() + .resolver() + .read() + .lookup_no_follow(&fs_path)? + } else { + ctx.thread_local + .borrow_fs() + .resolver() + .read() + .lookup(&fs_path)? + } + }; + + // Verify caller has read permissions on the inode. + let inode = dentry.inode(); + inode.check_permission(Permission::MAY_READ)?; + + if options.contains(InotifyControls::ONLYDIR) && inode.type_() != InodeType::Dir { + return_errno_with_message!(Errno::ENOTDIR, "path is not a directory"); + } + + let wd = inotify_file.add_watch(&dentry, interesting, options)?; + Ok(SyscallReturn::Return(wd as _)) +} + +pub fn sys_inotify_rm_watch(fd: FileDesc, wd: u32, ctx: &Context) -> Result { + debug!("inotify_rm_watch fd = {}, wd = {}", fd, wd); + + let mut file_table = ctx.thread_local.borrow_file_table_mut(); + let file = get_file_fast!(&mut file_table, fd); + let inotify_file = match file.downcast_ref::() { + Some(inotify_file) => inotify_file, + None => return_errno_with_message!(Errno::EINVAL, "file is not an inotify file"), + }; + inotify_file.remove_watch(wd)?; + Ok(SyscallReturn::Return(0)) +} + +fn parse_inotify_watch_request(flags: u32) -> Result<(InotifyEvents, InotifyControls)> { + let interesting = InotifyEvents::from_bits_truncate(flags); + let options = InotifyControls::from_bits_truncate(flags); + let recognized_bits = interesting.bits() | options.bits(); + + if flags & !recognized_bits != 0 { + return_errno_with_message!(Errno::EINVAL, "invalid flags"); + } + + Ok((interesting, options)) +} + +bitflags! { + struct InotifyFileFlags: u32 { + const NONBLOCK = 1 << 11; // Non-blocking + const CLOEXEC = 1 << 19; // Close on exec + } +} diff --git a/kernel/src/syscall/mod.rs b/kernel/src/syscall/mod.rs index 6ae0482f7..b12d15f7a 100644 --- a/kernel/src/syscall/mod.rs +++ b/kernel/src/syscall/mod.rs @@ -73,6 +73,7 @@ mod gettid; mod gettimeofday; mod getuid; mod getxattr; +mod inotify; mod ioctl; mod kill; mod link;