Support sealing memfd files

This commit is contained in:
Wang Siyuan 2025-10-30 07:54:37 +00:00 committed by Ruihan Li
parent d1506171d2
commit 6ba1a84ae9
5 changed files with 192 additions and 10 deletions

View File

@ -9,6 +9,7 @@ use crate::{
},
prelude::*,
process::{process_table, Pid},
vm::memfd::{FileSeals, MemfdFile},
};
pub fn sys_fcntl(fd: FileDesc, cmd: i32, arg: u64, ctx: &Context) -> Result<SyscallReturn> {
@ -29,6 +30,8 @@ pub fn sys_fcntl(fd: FileDesc, cmd: i32, arg: u64, ctx: &Context) -> Result<Sysc
}),
FcntlCmd::F_GETOWN => handle_getown(fd, ctx),
FcntlCmd::F_SETOWN => handle_setown(fd, arg, ctx),
FcntlCmd::F_ADD_SEALS => handle_addseal(fd, arg, ctx),
FcntlCmd::F_GET_SEALS => handle_getseal(fd, ctx),
}
}
@ -154,6 +157,39 @@ fn handle_setown(fd: FileDesc, arg: u64, ctx: &Context) -> Result<SyscallReturn>
Ok(SyscallReturn::Return(0))
}
fn handle_addseal(fd: FileDesc, arg: u64, ctx: &Context) -> Result<SyscallReturn> {
let new_seals = FileSeals::from_bits(arg as u32)
.ok_or_else(|| Error::with_message(Errno::EINVAL, "invalid seals"))?;
let mut file_table = ctx.thread_local.borrow_file_table_mut();
let file = get_file_fast!(&mut file_table, fd);
let memfd_file = file.downcast_ref::<MemfdFile>().ok_or_else(|| {
Error::with_message(
Errno::EINVAL,
"file seals can only be applied to memfd files",
)
})?;
memfd_file.add_seals(new_seals)?;
Ok(SyscallReturn::Return(0))
}
fn handle_getseal(fd: FileDesc, ctx: &Context) -> Result<SyscallReturn> {
let mut file_table = ctx.thread_local.borrow_file_table_mut();
let file = get_file_fast!(&mut file_table, fd);
let memfd_file = file.downcast_ref::<MemfdFile>().ok_or_else(|| {
Error::with_message(
Errno::EINVAL,
"file seals can only be applied to memfd files",
)
})?;
let file_seals = memfd_file.get_seals();
Ok(SyscallReturn::Return(file_seals.bits() as _))
}
#[repr(i32)]
#[derive(Debug, Clone, Copy, TryFromInt)]
#[expect(non_camel_case_types)]
@ -169,6 +205,8 @@ enum FcntlCmd {
F_SETOWN = 8,
F_GETOWN = 9,
F_DUPFD_CLOEXEC = 1030,
F_ADD_SEALS = 1033,
F_GET_SEALS = 1034,
}
#[expect(non_camel_case_types)]

View File

@ -26,9 +26,9 @@ pub fn sys_memfd_create(name_addr: Vaddr, flags: u32, ctx: &Context) -> Result<S
let file_table = ctx.thread_local.borrow_file_table();
let mut file_table_locked = file_table.unwrap().write();
// FIXME: Support `MFD_ALLOW_SEALING` and `MFD_HUGETLB`.
if memfd_flags.contains(MemfdFlags::MFD_ALLOW_SEALING) {
warn!("sealing not supported");
// FIXME: Support `MFD_HUGETLB`.
if memfd_flags.contains(MemfdFlags::MFD_HUGETLB) {
warn!("`MFD_HUGETLB` not supported");
}
if memfd_flags.contains(MemfdFlags::MFD_NOEXEC_SEAL | MemfdFlags::MFD_EXEC) {

View File

@ -29,7 +29,7 @@ use crate::{
signal::{PollHandle, Pollable},
Gid, Uid,
},
vm::vmo::Vmo,
vm::{perms::VmPerms, vmo::Vmo},
};
/// Maximum file name length for `memfd_create`, excluding the final `\0` byte.
@ -41,6 +41,62 @@ pub struct MemfdInode {
inode: RamInode,
#[expect(dead_code)]
name: String,
seals: Mutex<FileSeals>,
}
impl MemfdInode {
pub fn add_seals(&self, mut new_seals: FileSeals) -> Result<()> {
let mut seals = self.seals.lock();
if seals.contains(FileSeals::F_SEAL_SEAL) {
return_errno_with_message!(Errno::EPERM, "the file is sealed against sealing");
}
// Reference: <https://elixir.bootlin.com/linux/v6.16.5/source/mm/memfd.c#L262-L266>
if new_seals.contains(FileSeals::F_SEAL_EXEC)
&& self
.mode()
.unwrap()
.intersects(InodeMode::from_bits_truncate(0o111))
{
new_seals |= FileSeals::F_SEAL_SHRINK
| FileSeals::F_SEAL_GROW
| FileSeals::F_SEAL_WRITE
| FileSeals::F_SEAL_FUTURE_WRITE;
}
if new_seals.contains(FileSeals::F_SEAL_WRITE) {
let page_cache = self.page_cache().unwrap();
page_cache
.writable_mapping_status()
.as_ref()
.unwrap()
.deny()?;
}
*seals |= new_seals;
Ok(())
}
pub fn get_seals(&self) -> FileSeals {
*self.seals.lock()
}
/// Checks whether writing to this memfd inode is allowed.
///
/// This method restricts the `may_perms` if needed.
pub fn check_writable(&self, perms: VmPerms, may_perms: &mut VmPerms) -> Result<()> {
let seals = self.seals.lock();
if seals.intersects(FileSeals::F_SEAL_WRITE | FileSeals::F_SEAL_FUTURE_WRITE) {
if perms.contains(VmPerms::WRITE) {
return_errno_with_message!(Errno::EPERM, "the file is sealed against writing");
}
// Reference: <https://elixir.bootlin.com/linux/v6.16.5/source/mm/memfd.c#L356>
may_perms.remove(VmPerms::MAY_WRITE);
}
Ok(())
}
}
#[inherit_methods(from = "self.inode")]
@ -85,18 +141,64 @@ impl Inode for MemfdInode {
fn remove_xattr(&self, name: XattrName) -> Result<()>;
fn write_at(&self, offset: usize, reader: &mut VmReader) -> Result<usize> {
if !reader.has_remain() {
return Ok(0);
}
let seals = self.seals.lock();
if seals.intersects(FileSeals::F_SEAL_WRITE | FileSeals::F_SEAL_FUTURE_WRITE) {
return_errno_with_message!(Errno::EPERM, "the file is sealed against writing");
}
if seals.contains(FileSeals::F_SEAL_GROW) {
let file_size = self.inode.size();
if offset >= file_size {
return_errno_with_message!(Errno::EPERM, "the file is sealed against growing");
} else {
reader.limit(file_size - offset);
}
}
self.inode.write_at(offset, reader)
}
fn resize(&self, new_size: usize) -> Result<()> {
let seals = self.seals.lock();
if seals.contains(FileSeals::F_SEAL_SHRINK) && new_size < self.inode.size() {
return_errno_with_message!(Errno::EPERM, "the file is sealed against shrinking");
}
if seals.contains(FileSeals::F_SEAL_GROW) && new_size > self.inode.size() {
return_errno_with_message!(Errno::EPERM, "the file is sealed against growing");
}
self.inode.resize(new_size)
}
fn set_mode(&self, mode: InodeMode) -> Result<()> {
let seals = self.seals.lock();
if seals.contains(FileSeals::F_SEAL_EXEC)
&& (self.mode().unwrap() ^ mode).intersects(InodeMode::from_bits_truncate(0o111))
{
return_errno_with_message!(
Errno::EPERM,
"the file is sealed against modifying executable bits"
);
}
self.inode.set_mode(mode)
}
fn fallocate(&self, mode: FallocMode, offset: usize, len: usize) -> Result<()> {
let seals = self.seals.lock();
if seals.contains(FileSeals::F_SEAL_GROW) && offset + len > self.inode.size() {
return_errno_with_message!(Errno::EPERM, "the file is sealed against growing");
}
if seals.intersects(FileSeals::F_SEAL_WRITE | FileSeals::F_SEAL_FUTURE_WRITE)
&& mode == FallocMode::PunchHoleKeepSize
{
return_errno_with_message!(Errno::EPERM, "the file is sealed against writing");
}
self.inode.fallocate(mode, offset, len)
}
@ -138,9 +240,18 @@ impl MemfdFile {
let ram_inode =
new_detached_inode_in_memfd(weak_self, mode, Uid::new_root(), Gid::new_root());
let mut seals = FileSeals::empty();
if !allow_sealing {
seals |= FileSeals::F_SEAL_SEAL;
}
if !executable {
seals |= FileSeals::F_SEAL_EXEC;
}
MemfdInode {
inode: ram_inode,
name,
seals: Mutex::new(seals),
}
});
@ -152,6 +263,17 @@ impl MemfdFile {
})
}
pub fn add_seals(&self, new_seals: FileSeals) -> Result<()> {
if !self.access_mode.is_writable() {
return_errno_with_message!(Errno::EPERM, "the file is not opened writable");
}
self.memfd_inode().add_seals(new_seals)
}
pub fn get_seals(&self) -> FileSeals {
self.memfd_inode().get_seals()
}
fn memfd_inode(&self) -> &MemfdInode {
self.memfd_inode.downcast_ref::<MemfdInode>().unwrap()
}
@ -252,3 +374,20 @@ bitflags! {
const MFD_EXEC = 1 << 4;
}
}
bitflags! {
pub struct FileSeals: u32 {
/// Prevent further seals from being set.
const F_SEAL_SEAL = 0x0001;
/// Prevent file from shrinking.
const F_SEAL_SHRINK = 0x0002;
/// Prevent file from growing.
const F_SEAL_GROW = 0x0004;
/// Prevent writes.
const F_SEAL_WRITE = 0x0008;
/// Prevent future writes while mapped.
const F_SEAL_FUTURE_WRITE = 0x0010;
/// Prevent chmod modifying exec bits.
const F_SEAL_EXEC = 0x0020;
}
}

View File

@ -1128,7 +1128,7 @@ impl<'a> VmarMapOptions<'a> {
vmo,
mappable,
perms,
may_perms,
mut may_perms,
vmo_offset,
size: map_size,
offset,
@ -1183,9 +1183,16 @@ impl<'a> VmarMapOptions<'a> {
// Handle the memory backed by device or page cache.
match mappable {
Mappable::Inode(inode) => {
let is_writable_tracked = inode.downcast_ref::<MemfdInode>().is_some()
let is_writable_tracked = if let Some(memfd_inode) =
inode.downcast_ref::<MemfdInode>()
&& is_shared
&& may_perms.contains(VmPerms::MAY_WRITE);
&& may_perms.contains(VmPerms::MAY_WRITE)
{
memfd_inode.check_writable(perms, &mut may_perms)?;
true
} else {
false
};
// Since `Mappable::Inode` is provided, it is
// reasonable to assume that the VMO is provided.

View File

@ -1,6 +1,4 @@
MemfdTest.Name
MemfdTest.Seal*
MemfdTest.NoPartialSealApplicationWhenWriteSealRejected
MemfdTest.TmpfsFilesHaveSealSeal
MemfdTest.CanOpenFromProcfs
MemfdTest.SealGrowPartialWriteTruncatedSamePage
MemfdTest.MemfdMustBeWritableToModifySeals