Apply pseudo `Path` to memfd files and remove `MemfdFile`

This commit is contained in:
Wang Siyuan 2025-12-25 09:22:12 +00:00
parent 22798339b2
commit a21037cdfe
8 changed files with 154 additions and 380 deletions

View File

@ -12,7 +12,6 @@ use super::{
use crate::{
fs::{
path::MountNamespace,
ramfs::memfd::MemfdInode,
utils::{Inode, SymbolicLink},
},
prelude::*,
@ -407,13 +406,9 @@ impl PathOrInode {
pub fn display_name(&self) -> String {
match self {
PathOrInode::Path(path) => path.abs_path(),
PathOrInode::Inode(inode) => {
PathOrInode::Inode(_) => {
// FIXME: Add pseudo dentries to store the correct name.
if let Some(memfd_inode) = inode.downcast_ref::<MemfdInode>() {
memfd_inode.name().to_string()
} else {
String::from("[pseudo inode]")
}
String::from("[pseudo inode]")
}
}
}

View File

@ -84,6 +84,10 @@ impl InodeHandle {
*offset
}
pub(super) fn rights(&self) -> Rights {
self.rights
}
fn inode_io_and_is_offset_aware(&self) -> (&dyn InodeIo, bool) {
if let Some(ref file_io) = self.file_io {
let is_offset_aware = file_io.is_offset_aware();
@ -347,7 +351,11 @@ impl FileLike for InodeHandle {
return_errno_with_message!(Errno::EINVAL, "the file is not opened writable");
}
do_resize_util(self.path.inode().as_ref(), self.status_flags(), new_size)
if self.status_flags().contains(StatusFlags::O_APPEND) {
// FIXME: It's allowed to `ftruncate` an append-only file on Linux.
return_errno_with_message!(Errno::EPERM, "can not resize append-only file");
}
self.path.inode().resize(new_size)
}
fn status_flags(&self) -> StatusFlags {
@ -394,13 +402,41 @@ impl FileLike for InodeHandle {
return_errno_with_message!(Errno::EBADF, "the file is not opened writable");
}
do_fallocate_util(
self.path.inode().as_ref(),
self.status_flags(),
mode,
offset,
len,
)
let inode = self.path.inode().as_ref();
let inode_type = inode.type_();
// TODO: `fallocate` on pipe files also fails with `ESPIPE`.
if inode_type == InodeType::NamedPipe {
return_errno_with_message!(Errno::ESPIPE, "the inode is a FIFO file");
}
if !(inode_type == InodeType::File || inode_type == InodeType::Dir) {
return_errno_with_message!(
Errno::ENODEV,
"the inode is not a regular file or a directory"
);
}
let status_flags = self.status_flags();
if status_flags.contains(StatusFlags::O_APPEND)
&& (mode == FallocMode::PunchHoleKeepSize
|| mode == FallocMode::CollapseRange
|| mode == FallocMode::InsertRange)
{
return_errno_with_message!(
Errno::EPERM,
"the flags do not work on the append-only file"
);
}
if status_flags.contains(StatusFlags::O_DIRECT)
|| status_flags.contains(StatusFlags::O_PATH)
{
return_errno_with_message!(
Errno::EBADF,
"currently fallocate file with O_DIRECT or O_PATH is not supported"
);
}
inode.fallocate(mode, offset, len)
}
fn inode(&self) -> &Arc<dyn Inode> {
@ -477,11 +513,7 @@ pub trait FileIo: Pollable + InodeIo + Send + Sync + 'static {
}
}
pub(super) fn do_seek_util(
offset: &Mutex<usize>,
pos: SeekFrom,
end: Option<usize>,
) -> Result<usize> {
fn do_seek_util(offset: &Mutex<usize>, pos: SeekFrom, end: Option<usize>) -> Result<usize> {
let mut offset = offset.lock();
let new_offset = match pos {
@ -508,54 +540,3 @@ pub(super) fn do_seek_util(
*offset = new_offset;
Ok(new_offset)
}
pub(super) fn do_fallocate_util(
inode: &dyn Inode,
status_flags: StatusFlags,
mode: FallocMode,
offset: usize,
len: usize,
) -> Result<()> {
let inode_type = inode.type_();
// TODO: `fallocate` on pipe files also fails with `ESPIPE`.
if inode_type == InodeType::NamedPipe {
return_errno_with_message!(Errno::ESPIPE, "the inode is a FIFO file");
}
if !(inode_type == InodeType::File || inode_type == InodeType::Dir) {
return_errno_with_message!(
Errno::ENODEV,
"the inode is not a regular file or a directory"
);
}
if status_flags.contains(StatusFlags::O_APPEND)
&& (mode == FallocMode::PunchHoleKeepSize
|| mode == FallocMode::CollapseRange
|| mode == FallocMode::InsertRange)
{
return_errno_with_message!(
Errno::EPERM,
"the flags do not work on the append-only file"
);
}
if status_flags.contains(StatusFlags::O_DIRECT) || status_flags.contains(StatusFlags::O_PATH) {
return_errno_with_message!(
Errno::EBADF,
"currently fallocate file with O_DIRECT or O_PATH is not supported"
);
}
inode.fallocate(mode, offset, len)
}
pub(super) fn do_resize_util(
inode: &dyn Inode,
status_flags: StatusFlags,
new_size: usize,
) -> Result<()> {
if status_flags.contains(StatusFlags::O_APPEND) {
// FIXME: It's allowed to `ftruncate` an append-only file on Linux.
return_errno_with_message!(Errno::EPERM, "can not resize append-only file");
}
inode.resize(new_size)
}

View File

@ -113,17 +113,38 @@ impl Path {
///
/// Returns an `InodeHandle` on success.
pub fn open(&self, open_args: OpenArgs) -> Result<InodeHandle> {
let inode = self.inode();
check_open_util(inode.as_ref(), &open_args)?;
let inode = self.inode().as_ref();
let inode_type = inode.type_();
let creation_flags = &open_args.creation_flags;
let status_flags = &open_args.status_flags;
if inode.type_().is_regular_file()
&& open_args.creation_flags.contains(CreationFlags::O_TRUNC)
&& !open_args.status_flags.contains(StatusFlags::O_PATH)
if inode_type == InodeType::SymLink
&& creation_flags.contains(CreationFlags::O_NOFOLLOW)
&& !status_flags.contains(StatusFlags::O_PATH)
{
return_errno_with_message!(Errno::ELOOP, "the file is a symlink");
}
if creation_flags.contains(CreationFlags::O_CREAT)
&& creation_flags.contains(CreationFlags::O_EXCL)
{
return_errno_with_message!(Errno::EEXIST, "the file already exists");
}
if creation_flags.contains(CreationFlags::O_DIRECTORY) && inode_type != InodeType::Dir {
return_errno_with_message!(
Errno::ENOTDIR,
"O_DIRECTORY is specified but the file is not a directory"
);
}
if inode_type.is_regular_file()
&& creation_flags.contains(CreationFlags::O_TRUNC)
&& !status_flags.contains(StatusFlags::O_PATH)
{
self.resize(0)?;
}
InodeHandle::new(self.clone(), open_args.access_mode, open_args.status_flags)
InodeHandle::new(self.clone(), open_args.access_mode, *status_flags)
}
/// Gets the absolute path.
@ -242,33 +263,6 @@ impl Path {
}
}
/// Checks if the given `Inode` can be opened with the given `OpenArgs`.
pub(super) fn check_open_util(inode: &dyn Inode, open_args: &OpenArgs) -> Result<()> {
let inode_type = inode.type_();
let creation_flags = &open_args.creation_flags;
if inode_type == InodeType::SymLink
&& creation_flags.contains(CreationFlags::O_NOFOLLOW)
&& !open_args.status_flags.contains(StatusFlags::O_PATH)
{
return_errno_with_message!(Errno::ELOOP, "the file is a symlink");
}
if creation_flags.contains(CreationFlags::O_CREAT)
&& creation_flags.contains(CreationFlags::O_EXCL)
{
return_errno_with_message!(Errno::EEXIST, "the file already exists");
}
if creation_flags.contains(CreationFlags::O_DIRECTORY) && inode_type != InodeType::Dir {
return_errno_with_message!(
Errno::ENOTDIR,
"O_DIRECTORY is specified but the file is not a directory"
);
}
Ok(())
}
impl Path {
/// Mounts a filesystem at the current path.
///

View File

@ -3,11 +3,7 @@
//! Memfd Implementation.
use alloc::format;
use core::{
fmt::Display,
sync::atomic::{AtomicU32, Ordering},
time::Duration,
};
use core::time::Duration;
use align_ext::AlignExt;
use aster_block::bio::BioWaiter;
@ -17,25 +13,18 @@ use spin::Once;
use super::fs::RamInode;
use crate::{
events::IoEvents,
fs::{
file_handle::{FileLike, Mappable},
file_table::FdFlags,
inode_handle::{do_fallocate_util, do_resize_util, do_seek_util},
path::{Mount, RESERVED_MOUNT_ID, check_open_util},
inode_handle::InodeHandle,
path::{Mount, Path},
tmpfs::TmpFs,
utils::{
AccessMode, CachePage, CreationFlags, Extension, FallocMode, FileSystem, Inode,
InodeIo, InodeMode, InodeType, Metadata, OpenArgs, PageCacheBackend, SeekFrom,
StatusFlags, XattrName, XattrNamespace, XattrSetFlags, mkmod,
AccessMode, CachePage, Extension, FallocMode, FileSystem, Inode, InodeIo, InodeMode,
InodeType, Metadata, PageCacheBackend, StatusFlags, XattrName, XattrNamespace,
XattrSetFlags, mkmod,
},
},
prelude::*,
process::{
Gid, Uid,
signal::{PollHandle, Pollable},
},
util::ioctl::RawIoctl,
process::{Gid, Uid},
vm::{perms::VmPerms, vmo::Vmo},
};
@ -97,7 +86,7 @@ impl MemfdInode {
Ok(())
}
pub fn name(&self) -> &str {
pub(self) fn name(&self) -> &str {
&self.name
}
}
@ -235,53 +224,31 @@ impl Inode for MemfdInode {
}
}
struct MemfdTmpFs {
_private: (),
pub trait MemfdInodeHandle: Sized {
fn new_memfd(name: String, memfd_flags: MemfdFlags) -> Result<Self>;
fn add_seals(&self, new_seals: FileSeals) -> Result<()>;
fn get_seals(&self) -> Result<FileSeals>;
}
impl MemfdTmpFs {
// Reference: <https://elixir.bootlin.com/linux/v6.16.5/source/mm/shmem.c#L3828-L3850>
fn singleton() -> &'static Arc<TmpFs> {
static MEMFD_TMPFS: Once<Arc<TmpFs>> = Once::new();
MEMFD_TMPFS.call_once(TmpFs::new)
}
fn mount_node() -> &'static Arc<Mount> {
static MEMFD_TMPFS_MOUNT: Once<Arc<Mount>> = Once::new();
MEMFD_TMPFS_MOUNT.call_once(|| Mount::new_pseudo(Self::singleton().clone()))
}
}
pub struct MemfdFile {
memfd_inode: Arc<dyn Inode>,
offset: Mutex<usize>,
status_flags: AtomicU32,
rights: Rights,
}
impl MemfdFile {
pub fn new(name: &str, memfd_flags: MemfdFlags) -> Result<Self> {
impl MemfdInodeHandle for InodeHandle {
fn new_memfd(name: String, memfd_flags: MemfdFlags) -> Result<Self> {
if name.len() > MAX_MEMFD_NAME_LEN {
return_errno_with_message!(Errno::EINVAL, "MemfdManager: `name` is too long.");
return_errno_with_message!(Errno::EINVAL, "the memfd name is too long");
}
let name = format!("/memfd:{}", name);
let (allow_sealing, executable) = if memfd_flags.contains(MemfdFlags::MFD_NOEXEC_SEAL) {
(true, false)
} else {
(memfd_flags.contains(MemfdFlags::MFD_ALLOW_SEALING), true)
};
let mode = if executable {
mkmod!(a+rwx)
} else {
mkmod!(a+rw)
};
let memfd_inode = Arc::new_cyclic(|weak_self| {
let (allow_sealing, executable) = if memfd_flags.contains(MemfdFlags::MFD_NOEXEC_SEAL) {
(true, false)
} else {
(memfd_flags.contains(MemfdFlags::MFD_ALLOW_SEALING), true)
};
let mode = if executable {
mkmod!(a+rwx)
} else {
mkmod!(a+rw)
};
let ram_inode = RamInode::new_file_detached_in_memfd(
weak_self,
mode,
@ -304,216 +271,68 @@ impl MemfdFile {
}
});
Ok(Self {
memfd_inode,
offset: Mutex::new(0),
status_flags: AtomicU32::new(0),
rights: Rights::READ | Rights::WRITE,
})
let path = MemfdTmpFs::new_path(memfd_inode);
InodeHandle::new_unchecked_access(path, AccessMode::O_RDWR, StatusFlags::empty())
}
pub fn open(inode: Arc<MemfdInode>, open_args: OpenArgs) -> Result<Self> {
let inode: Arc<dyn Inode> = inode;
let status_flags = open_args.status_flags;
let access_mode = open_args.access_mode;
if !status_flags.contains(StatusFlags::O_PATH) {
inode.check_permission(access_mode.into())?;
}
check_open_util(inode.as_ref(), &open_args)?;
if open_args.creation_flags.contains(CreationFlags::O_TRUNC)
&& !status_flags.contains(StatusFlags::O_PATH)
{
inode.resize(0)?;
}
let rights = if status_flags.contains(StatusFlags::O_PATH) {
Rights::empty()
} else {
access_mode.into()
};
Ok(Self {
memfd_inode: inode,
offset: Mutex::new(0),
status_flags: AtomicU32::new(open_args.status_flags.bits()),
rights,
})
}
pub fn add_seals(&self, new_seals: FileSeals) -> Result<()> {
if self.rights.is_empty() {
fn add_seals(&self, new_seals: FileSeals) -> Result<()> {
let rights = self.rights();
if rights.is_empty() {
return_errno_with_message!(Errno::EBADF, "the file is opened as a path");
}
if !self.rights.contains(Rights::WRITE) {
if !rights.contains(Rights::WRITE) {
return_errno_with_message!(Errno::EPERM, "the file is not opened writable");
}
self.memfd_inode().add_seals(new_seals)
memfd_inode_or_err(self)?.add_seals(new_seals)
}
pub fn get_seals(&self) -> Result<FileSeals> {
if self.rights.is_empty() {
fn get_seals(&self) -> Result<FileSeals> {
let rights = self.rights();
if rights.is_empty() {
return_errno_with_message!(Errno::EBADF, "the file is opened as a path");
}
Ok(self.memfd_inode().get_seals())
}
fn memfd_inode(&self) -> &MemfdInode {
self.memfd_inode.downcast_ref::<MemfdInode>().unwrap()
Ok(memfd_inode_or_err(self)?.get_seals())
}
}
impl Pollable for MemfdFile {
fn poll(&self, mask: IoEvents, _poller: Option<&mut PollHandle>) -> IoEvents {
let events = IoEvents::IN | IoEvents::OUT;
events & mask
}
}
impl FileLike for MemfdFile {
fn read(&self, writer: &mut VmWriter) -> Result<usize> {
let mut offset = self.offset.lock();
let len = self.read_at(*offset, writer)?;
*offset += len;
Ok(len)
}
fn read_at(&self, offset: usize, writer: &mut VmWriter) -> Result<usize> {
if !self.rights.contains(Rights::READ) {
return_errno_with_message!(Errno::EBADF, "the file is not opened readable");
}
self.memfd_inode
.read_at(offset, writer, self.status_flags())
}
fn write(&self, reader: &mut VmReader) -> Result<usize> {
let mut offset = self.offset.lock();
if self.status_flags().contains(StatusFlags::O_APPEND) {
// FIXME: `O_APPEND` should ensure that new content is appended even if another process
// is writing to the file concurrently.
*offset = self.memfd_inode.size();
}
let len = self.write_at(*offset, reader)?;
*offset += len;
Ok(len)
}
fn write_at(&self, mut offset: usize, reader: &mut VmReader) -> Result<usize> {
if !self.rights.contains(Rights::WRITE) {
return_errno_with_message!(Errno::EBADF, "the file is not opened writable");
}
let status_flags = self.status_flags();
if status_flags.contains(StatusFlags::O_APPEND) {
// If the file has the `O_APPEND` flag, the offset is ignored.
// FIXME: `O_APPEND` should ensure that new content is appended even if another process
// is writing to the file concurrently.
offset = self.memfd_inode.size();
}
self.memfd_inode.write_at(offset, reader, status_flags)
}
fn resize(&self, new_size: usize) -> Result<()> {
if self.rights.is_empty() {
return_errno_with_message!(Errno::EBADF, "the file is opened as a path");
}
if !self.rights.contains(Rights::WRITE) {
return_errno_with_message!(Errno::EINVAL, "the file is not opened writable");
}
do_resize_util(self.memfd_inode.as_ref(), self.status_flags(), new_size)
}
fn status_flags(&self) -> StatusFlags {
let bits = self.status_flags.load(Ordering::Relaxed);
StatusFlags::from_bits(bits).unwrap()
}
fn set_status_flags(&self, new_status_flags: StatusFlags) -> Result<()> {
self.status_flags
.store(new_status_flags.bits(), Ordering::Relaxed);
Ok(())
}
fn access_mode(&self) -> AccessMode {
self.rights.into()
}
fn seek(&self, pos: SeekFrom) -> Result<usize> {
if self.rights.is_empty() {
return_errno_with_message!(Errno::EBADF, "the file is opened as a path");
}
do_seek_util(&self.offset, pos, Some(self.memfd_inode.size()))
}
fn fallocate(&self, mode: FallocMode, offset: usize, len: usize) -> Result<()> {
if !self.rights.contains(Rights::WRITE) {
return_errno_with_message!(Errno::EBADF, "the file is not opened writable");
}
do_fallocate_util(
self.memfd_inode.as_ref(),
self.status_flags(),
mode,
offset,
len,
)
}
fn mappable(&self) -> Result<Mappable> {
if self.rights.is_empty() {
return_errno_with_message!(Errno::EBADF, "the file is opened as a path");
}
Ok(Mappable::Inode(self.memfd_inode.clone()))
}
fn ioctl(&self, _raw_ioctl: RawIoctl) -> Result<i32> {
if self.rights.is_empty() {
return_errno_with_message!(Errno::EBADF, "the file is opened as a path");
}
return_errno_with_message!(Errno::ENOTTY, "ioctl is not supported");
}
fn inode(&self) -> &Arc<dyn Inode> {
&self.memfd_inode
}
fn dump_proc_fdinfo(self: Arc<Self>, fd_flags: FdFlags) -> Box<dyn Display> {
struct FdInfo {
inner: Arc<MemfdFile>,
fd_flags: FdFlags,
}
impl Display for FdInfo {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
let mut flags = self.inner.status_flags().bits() | self.inner.access_mode() as u32;
if self.fd_flags.contains(FdFlags::CLOEXEC) {
flags |= CreationFlags::O_CLOEXEC.bits();
}
writeln!(f, "pos:\t{}", *self.inner.offset.lock())?;
writeln!(f, "flags:\t0{:o}", flags)?;
writeln!(f, "mnt_id:\t{}", RESERVED_MOUNT_ID)?;
writeln!(f, "ino:\t{}", self.inner.inode().ino())
}
}
Box::new(FdInfo {
inner: self,
fd_flags,
fn memfd_inode_or_err(file: &InodeHandle) -> Result<&MemfdInode> {
file.path()
.inode()
.downcast_ref::<MemfdInode>()
.ok_or_else(|| {
Error::with_message(
Errno::EINVAL,
"file seals can only be applied to memfd files",
)
})
}
struct MemfdTmpFs {
_private: (),
}
impl MemfdTmpFs {
// Reference: <https://elixir.bootlin.com/linux/v6.16.5/source/mm/shmem.c#L3828-L3850>
pub(self) fn singleton() -> &'static Arc<TmpFs> {
static MEMFD_TMPFS: Once<Arc<TmpFs>> = Once::new();
MEMFD_TMPFS.call_once(TmpFs::new)
}
pub(self) fn new_path(memfd_inode: Arc<MemfdInode>) -> Path {
Path::new_pseudo(Self::mount_node().clone(), memfd_inode, |inode| {
let memfd_inode = inode.downcast_ref::<MemfdInode>().unwrap();
format!("/memfd:{}", memfd_inode.name())
})
}
fn mount_node() -> &'static Arc<Mount> {
static MEMFD_TMPFS_MOUNT: Once<Arc<Mount>> = Once::new();
MEMFD_TMPFS_MOUNT.call_once(|| Mount::new_pseudo(Self::singleton().clone()))
}
}

View File

@ -7,7 +7,7 @@ use crate::{
fs::{
file_handle::FileLike,
file_table::{FdFlags, FileDesc, WithFileTable, get_file_fast},
ramfs::memfd::{FileSeals, MemfdFile},
ramfs::memfd::{FileSeals, MemfdInodeHandle},
utils::{FileRange, OFFSET_MAX, RangeLockItem, RangeLockType, StatusFlags},
},
prelude::*,
@ -165,14 +165,8 @@ fn handle_addseal(fd: FileDesc, arg: u64, ctx: &Context) -> Result<SyscallReturn
let mut file_table = ctx.thread_local.borrow_file_table_mut();
let file = get_file_fast!(&mut file_table, fd);
let memfd_file = file.downcast_ref::<MemfdFile>().ok_or_else(|| {
Error::with_message(
Errno::EINVAL,
"file seals can only be applied to memfd files",
)
})?;
memfd_file.add_seals(new_seals)?;
file.as_inode_handle_or_err()?.add_seals(new_seals)?;
Ok(SyscallReturn::Return(0))
}
@ -180,14 +174,8 @@ fn handle_addseal(fd: FileDesc, arg: u64, ctx: &Context) -> Result<SyscallReturn
fn handle_getseal(fd: FileDesc, ctx: &Context) -> Result<SyscallReturn> {
let mut file_table = ctx.thread_local.borrow_file_table_mut();
let file = get_file_fast!(&mut file_table, fd);
let memfd_file = file.downcast_ref::<MemfdFile>().ok_or_else(|| {
Error::with_message(
Errno::EINVAL,
"file seals can only be applied to memfd files",
)
})?;
let file_seals = memfd_file.get_seals()?;
let file_seals = file.as_inode_handle_or_err()?.get_seals()?;
Ok(SyscallReturn::Return(file_seals.bits() as _))
}

View File

@ -4,7 +4,8 @@ use super::SyscallReturn;
use crate::{
fs::{
file_table::FdFlags,
ramfs::memfd::{MAX_MEMFD_NAME_LEN, MemfdFile, MemfdFlags},
inode_handle::InodeHandle,
ramfs::memfd::{MAX_MEMFD_NAME_LEN, MemfdFlags, MemfdInodeHandle},
},
prelude::*,
};
@ -40,7 +41,7 @@ pub fn sys_memfd_create(name_addr: Vaddr, flags: u32, ctx: &Context) -> Result<S
);
}
let memfd_file = MemfdFile::new(name.to_string_lossy().as_ref(), memfd_flags)?;
let memfd_file = InodeHandle::new_memfd(name.to_string_lossy().into_owned(), memfd_flags)?;
file_table_locked.insert(Arc::new(memfd_file), fd_flags)
};

View File

@ -9,7 +9,6 @@ use crate::{
fs_resolver::{AT_FDCWD, FsPath, FsResolver, LookupResult, PathOrInode},
inode_handle::InodeHandle,
pipe::{AnonPipeFile, AnonPipeInode},
ramfs::memfd::{MemfdFile, MemfdInode},
utils::{AccessMode, CreationFlags, InodeMode, InodeType, OpenArgs, StatusFlags},
},
prelude::*,
@ -93,9 +92,7 @@ fn do_open(
LookupResult::Resolved(target) => match target {
PathOrInode::Path(path) => Arc::new(path.open(open_args)?),
PathOrInode::Inode(inode) => {
if let Ok(memfd_inode) = Arc::downcast::<MemfdInode>(inode.clone()) {
Arc::new(MemfdFile::open(memfd_inode, open_args)?)
} else if let Ok(pipe_inode) = Arc::downcast::<AnonPipeInode>(inode) {
if let Ok(pipe_inode) = Arc::downcast::<AnonPipeInode>(inode) {
Arc::new(AnonPipeFile::open(
pipe_inode,
open_args.access_mode,

View File

@ -1 +0,0 @@
MemfdTest.Name