From 4fa2b55e47884bb53b2048cb2fa55c10ba7b1716 Mon Sep 17 00:00:00 2001 From: wyt8 <2253457010@qq.com> Date: Mon, 12 Jan 2026 03:21:40 +0000 Subject: [PATCH] Place the heap after bss segment --- kernel/src/process/execve.rs | 4 +- kernel/src/process/mod.rs | 2 +- kernel/src/process/process/init_proc.rs | 8 +- kernel/src/process/process_vm/heap.rs | 182 ++++++++++-------- kernel/src/process/process_vm/mod.rs | 32 ++- .../process/program_loader/elf/elf_file.rs | 8 + .../process/program_loader/elf/load_elf.rs | 66 +++++-- kernel/src/process/rlimit.rs | 5 +- kernel/src/syscall/brk.rs | 11 +- kernel/src/vm/vmar/vmar_impls/mod.rs | 76 +++++--- 10 files changed, 234 insertions(+), 160 deletions(-) diff --git a/kernel/src/process/execve.rs b/kernel/src/process/execve.rs index 56865c68c..65f882c62 100644 --- a/kernel/src/process/execve.rs +++ b/kernel/src/process/execve.rs @@ -15,7 +15,7 @@ use crate::{ process::{ ContextUnshareAdminApi, Credentials, Process, posix_thread::{PosixThread, ThreadLocal, ThreadName, sigkill_other_threads, thread_table}, - process_vm::{MAX_LEN_STRING_ARG, MAX_NR_STRING_ARGS, new_vmar_and_map}, + process_vm::{MAX_LEN_STRING_ARG, MAX_NR_STRING_ARGS, ProcessVm}, program_loader::{ProgramToLoad, elf::ElfLoadInfo}, signal::{ HandlePendingSignal, PauseReason, SigStack, @@ -54,7 +54,7 @@ pub fn do_execve( let program_to_load = ProgramToLoad::build_from_inode(elf_inode.clone(), &fs_resolver, argv, envp)?; - let new_vmar = new_vmar_and_map(elf_file.clone()); + let new_vmar = Vmar::new(ProcessVm::new(elf_file.clone())); let elf_load_info = program_to_load.load_to_vmar(new_vmar.as_ref(), &fs_resolver)?; // Ensure no other thread is concurrently performing exit_group or execve. diff --git a/kernel/src/process/mod.rs b/kernel/src/process/mod.rs index d4a144eec..ad06e5c29 100644 --- a/kernel/src/process/mod.rs +++ b/kernel/src/process/mod.rs @@ -38,7 +38,7 @@ pub use process::{ Terminal, broadcast_signal_async, enqueue_signal_async, spawn_init_process, }; pub use process_filter::ProcessFilter; -pub use process_vm::ProcessVm; +pub use process_vm::{INIT_STACK_SIZE, ProcessVm}; pub use rlimit::ResourceType; pub use stats::collect_process_creation_count; pub use term_status::TermStatus; diff --git a/kernel/src/process/process/init_proc.rs b/kernel/src/process/process/init_proc.rs index 5c80f06f6..f156e221c 100644 --- a/kernel/src/process/process/init_proc.rs +++ b/kernel/src/process/process/init_proc.rs @@ -13,16 +13,16 @@ use crate::{ }, prelude::*, process::{ - Credentials, UserNamespace, + Credentials, ProcessVm, UserNamespace, posix_thread::{PosixThreadBuilder, ThreadName, allocate_posix_tid}, process_table, - process_vm::new_vmar_and_map, program_loader::ProgramToLoad, rlimit::new_resource_limits_for_init, signal::sig_disposition::SigDispositions, }, sched::Nice, thread::Tid, + vm::vmar::Vmar, }; /// Creates and schedules the init process to run. @@ -56,7 +56,7 @@ fn create_init_process( let elf_path = fs.resolver().read().lookup(&fs_path)?; let pid = allocate_posix_tid(); - let process_vm = new_vmar_and_map(elf_path.clone()); + let vmar = Vmar::new(ProcessVm::new(elf_path.clone())); let resource_limits = new_resource_limits_for_init(); let nice = Nice::default(); let oom_score_adj = 0; @@ -65,7 +65,7 @@ fn create_init_process( let init_proc = Process::new( pid, - process_vm, + vmar, resource_limits, nice, oom_score_adj, diff --git a/kernel/src/process/process_vm/heap.rs b/kernel/src/process/process_vm/heap.rs index 63a39d610..9d37c2bb4 100644 --- a/kernel/src/process/process_vm/heap.rs +++ b/kernel/src/process/process_vm/heap.rs @@ -1,139 +1,157 @@ // SPDX-License-Identifier: MPL-2.0 -use core::sync::atomic::{AtomicUsize, Ordering}; +use core::ops::Range; use align_ext::AlignExt; use crate::{ prelude::*, + process::ResourceType, + util::random::getrandom, vm::{perms::VmPerms, vmar::Vmar}, }; -/// The base address of user heap -pub const USER_HEAP_BASE: Vaddr = 0x0000_0000_1000_0000; -/// The max allowed size of user heap -pub const USER_HEAP_SIZE_LIMIT: usize = 16 * 1024 * PAGE_SIZE; // 16 * 4MB - #[derive(Debug)] pub struct Heap { - /// The lowest address of the heap - base: Vaddr, - /// The heap size limit - limit: usize, - /// The current heap highest address - current_program_break: AtomicUsize, + inner: Mutex>, +} + +#[derive(Clone, Debug)] +struct HeapInner { + /// The size of the data segment, used for rlimit checking. + data_segment_size: usize, + /// The heap range. + // NOTE: `heap_range.end` is decided by user input and may not be page-aligned. + heap_range: Range, } impl Heap { - pub const fn new() -> Self { - Heap { - base: USER_HEAP_BASE, - limit: USER_HEAP_SIZE_LIMIT, - current_program_break: AtomicUsize::new(USER_HEAP_BASE), + pub(super) const fn new_uninitialized() -> Self { + Self { + inner: Mutex::new(None), } } /// Initializes and maps the heap virtual memory. - pub(super) fn alloc_and_map(&self, vmar: &Vmar) -> Result<()> { + pub(super) fn map_and_init_heap( + &self, + vmar: &Vmar, + data_segment_size: usize, + heap_base: Vaddr, + ) -> Result<()> { + let mut inner = self.inner.lock(); + + let nr_pages_padding = { + // Some random padding pages are added as a simple measure to + // make the heap values of a buggy user program harder + // to be exploited by attackers. + let mut nr_random_padding_pages: u8 = 0; + getrandom(nr_random_padding_pages.as_bytes_mut()); + + nr_random_padding_pages as usize + }; + + let heap_start = heap_base.align_up(PAGE_SIZE) + nr_pages_padding * PAGE_SIZE; + let vmar_map_options = { let perms = VmPerms::READ | VmPerms::WRITE; - vmar.new_map(PAGE_SIZE, perms).unwrap().offset(self.base) + vmar.new_map(PAGE_SIZE, perms).unwrap().offset(heap_start) }; vmar_map_options.build()?; - // If we touch another mapped range when we are trying to expand the - // heap, we fail. - // - // So a simple solution is to reserve enough space for the heap by - // mapping without any permissions and allow it to be overwritten - // later by `brk`. New mappings from `mmap` that overlaps this range - // may be moved to another place. - let vmar_reserve_options = { - let perms = VmPerms::empty(); - vmar.new_map(USER_HEAP_SIZE_LIMIT - PAGE_SIZE, perms) - .unwrap() - .offset(self.base + PAGE_SIZE) - }; - vmar_reserve_options.build()?; + debug_assert!(inner.is_none()); + *inner = Some(HeapInner { + data_segment_size, + heap_range: heap_start..heap_start + PAGE_SIZE, + }); - self.set_uninitialized(); Ok(()) } - /// Returns the current program break. - pub fn program_break(&self) -> Vaddr { - self.current_program_break.load(Ordering::Relaxed) + /// Returns the current heap end. + pub fn heap_end(&self) -> Vaddr { + let inner = self.inner.lock(); + let inner = inner.as_ref().expect("Heap is not initialized"); + inner.heap_range.end } - /// Sets the program break to `new_heap_end`. + /// Modifies the end address of the heap. /// - /// Returns the new program break on success, or the current program break on failure. + /// Returns the new heap end on success, or the current heap end on failure. /// This behavior is consistent with the Linux `brk` syscall. - pub fn set_program_break( + // + // Reference: + pub fn modify_heap_end( &self, - new_program_break: Vaddr, + new_heap_end: Vaddr, ctx: &Context, ) -> core::result::Result { let user_space = ctx.user_space(); let vmar = user_space.vmar(); - let current_program_break = self.current_program_break.load(Ordering::Acquire); + let mut inner = self.inner.lock(); + let inner = inner.as_mut().expect("Heap is not initialized"); - // According to the Linux source code, when the `brk` value is more than the - // rlimit, the `brk` syscall returns the current `brk` value. - // Reference: - if new_program_break > self.base + self.limit { - return Err(current_program_break); - } - if new_program_break < current_program_break { - // FIXME: should we allow shrink current user heap? - return Ok(current_program_break); + let heap_start = inner.heap_range.start; + let current_heap_end = inner.heap_range.end; + let new_heap_range = heap_start..new_heap_end; + + // Check if the new heap end is valid. + if new_heap_end < heap_start + || check_data_rlimit(inner.data_segment_size, &new_heap_range, ctx).is_err() + || new_heap_end.checked_add(PAGE_SIZE).is_none() + { + return Err(current_heap_end); } - let current_program_break_aligned = current_program_break.align_up(PAGE_SIZE); - let new_program_break_aligned = new_program_break.align_up(PAGE_SIZE); + let current_heap_end_aligned = current_heap_end.align_up(PAGE_SIZE); + let new_heap_end_aligned = new_heap_end.align_up(PAGE_SIZE); - // No need to expand the heap. - if new_program_break_aligned == current_program_break_aligned { - self.current_program_break - .store(new_program_break, Ordering::Release); - return Ok(new_program_break); + let old_size = current_heap_end_aligned - heap_start; + let new_size = new_heap_end_aligned - heap_start; + + // No change in the heap mapping. + if old_size == new_size { + inner.heap_range = new_heap_range; + return Ok(new_heap_end); } - // Remove the reserved space. - vmar.remove_mapping(current_program_break_aligned..new_program_break_aligned) - .map_err(|_| current_program_break)?; + // Because the mapped heap region may contain multiple mappings, which can be + // done by `mmap` syscall or other ways, we need to be careful when modifying + // the heap mapping. + // For simplicity, we set `check_single_mapping` to `true` to ensure that the + // heap region contains only a single mapping. + vmar.resize_mapping(heap_start, old_size, new_size, true) + .map_err(|_| current_heap_end)?; - let old_size = current_program_break_aligned - self.base; - let new_size = new_program_break_aligned - self.base; - // Expand the heap. - vmar.resize_mapping(self.base, old_size, new_size, false) - .map_err(|_| current_program_break)?; - - self.current_program_break - .store(new_program_break, Ordering::Release); - Ok(new_program_break) - } - - pub(super) fn set_uninitialized(&self) { - self.current_program_break - .store(self.base + PAGE_SIZE, Ordering::Relaxed); + inner.heap_range = new_heap_range; + Ok(new_heap_end) } } impl Clone for Heap { fn clone(&self) -> Self { - let current_program_break = self.current_program_break.load(Ordering::Relaxed); Self { - base: self.base, - limit: self.limit, - current_program_break: AtomicUsize::new(current_program_break), + inner: Mutex::new(self.inner.lock().clone()), } } } -impl Default for Heap { - fn default() -> Self { - Self::new() +/// Checks whether the new heap range exceeds the data segment size limit. +// Reference: +fn check_data_rlimit( + data_segment_size: usize, + new_heap_range: &Range, + ctx: &Context, +) -> Result<()> { + let rlimit_data = ctx + .process + .resource_limits() + .get_rlimit(ResourceType::RLIMIT_DATA) + .get_cur(); + + if rlimit_data.saturating_sub(data_segment_size as u64) < new_heap_range.len() as u64 { + return_errno_with_message!(Errno::ENOSPC, "the data segment size limit is reached"); } + Ok(()) } diff --git a/kernel/src/process/process_vm/mod.rs b/kernel/src/process/process_vm/mod.rs index cb2bd1561..e06bb8d19 100644 --- a/kernel/src/process/process_vm/mod.rs +++ b/kernel/src/process/process_vm/mod.rs @@ -18,7 +18,7 @@ use core::sync::atomic::{AtomicUsize, Ordering}; use ostd::{sync::MutexGuard, task::disable_preempt}; pub use self::{ - heap::{Heap, USER_HEAP_SIZE_LIMIT}, + heap::Heap, init_stack::{ INIT_STACK_SIZE, InitStack, InitStackReader, MAX_LEN_STRING_ARG, MAX_NR_STRING_ARGS, aux_vec::{AuxKey, AuxVec}, @@ -28,8 +28,6 @@ use crate::{fs::path::Path, prelude::*, vm::vmar::Vmar}; /* * The user's virtual memory space layout looks like below. - * TODO: The layout of the userheap does not match the current implementation, - * And currently the initial program break is a fixed value. * * (high address) * +---------------------+ <------+ The top of Vmar, which is the highest address usable @@ -77,10 +75,10 @@ pub struct ProcessVm { impl ProcessVm { /// Creates a new `ProcessVm` without mapping anything. - fn new(executable_file: Path) -> Self { + pub(super) fn new(executable_file: Path) -> Self { Self { init_stack: InitStack::new(), - heap: Heap::new(), + heap: Heap::new_uninitialized(), executable_file, #[cfg(target_arch = "riscv64")] vdso_base: AtomicUsize::new(0), @@ -124,6 +122,17 @@ impl ProcessVm { self.init_stack().map_and_write(vmar, argv, envp, aux_vec) } + /// Maps and initializes the heap virtual memory. + pub(super) fn map_and_init_heap( + &self, + vmar: &Vmar, + data_segment_size: usize, + heap_base: Vaddr, + ) -> Result<()> { + self.heap() + .map_and_init_heap(vmar, data_segment_size, heap_base) + } + /// Returns the base address for vDSO segment. #[cfg(target_arch = "riscv64")] pub(super) fn vdso_base(&self) -> Vaddr { @@ -200,19 +209,6 @@ impl<'a> ProcessVmarGuard<'a> { } } -/// Creates a new VMAR and map the heap. -/// -/// This method should only be used to create a VMAR for the init process. -pub(super) fn new_vmar_and_map(executable_file: Path) -> Arc { - let new_vmar = Vmar::new(ProcessVm::new(executable_file)); - new_vmar - .process_vm() - .heap() - .alloc_and_map(new_vmar.as_ref()) - .unwrap(); - new_vmar -} - /// Activates the [`Vmar`] in the current process's context. pub(super) fn activate_vmar(ctx: &Context, new_vmar: Arc) { let mut vmar_guard = ctx.process.lock_vmar(); diff --git a/kernel/src/process/program_loader/elf/elf_file.rs b/kernel/src/process/program_loader/elf/elf_file.rs index f4c8ebc24..e25fa1b6a 100644 --- a/kernel/src/process/program_loader/elf/elf_file.rs +++ b/kernel/src/process/program_loader/elf/elf_file.rs @@ -168,6 +168,14 @@ impl ElfHeaders { .reduce(|r1, r2| r1.start.min(r2.start)..r1.end.max(r2.end)) .unwrap() } + + /// Finds the last loadable segment and returns its virtual address bounds. + pub(super) fn find_last_vaddr_bound(&self) -> Option> { + self.loadable_phdrs + .iter() + .max_by_key(|phdr| phdr.virt_range().end) + .map(|phdr| phdr.virt_range().clone()) + } } struct ElfHeader { diff --git a/kernel/src/process/program_loader/elf/load_elf.rs b/kernel/src/process/program_loader/elf/load_elf.rs index f0750f0c6..902ed8d98 100644 --- a/kernel/src/process/program_loader/elf/load_elf.rs +++ b/kernel/src/process/program_loader/elf/load_elf.rs @@ -64,7 +64,7 @@ pub fn load_elf_to_vmar( not(any(target_arch = "x86_64", target_arch = "riscv64")), expect(unused_mut) )] - let (_range, entry_point, mut aux_vec) = + let (elf_mapped_info, entry_point, mut aux_vec) = map_vmos_and_build_aux_vec(vmar, ldso, &elf_headers, elf_inode)?; // Map the vDSO and set the entry. @@ -81,6 +81,11 @@ pub fn load_elf_to_vmar( vmar.process_vm() .map_and_write_init_stack(vmar, argv, envp, aux_vec)?; + vmar.process_vm().map_and_init_heap( + vmar, + elf_mapped_info.data_segment_size, + elf_mapped_info.heap_base, + )?; let user_stack_top = vmar.process_vm().init_stack().user_stack_top(); Ok(ElfLoadInfo { @@ -132,26 +137,26 @@ fn lookup_and_parse_ldso( /// Maps the VMOs to the corresponding virtual memory addresses and builds the auxiliary vector. /// -/// Returns the mapped range, the entry point, and the auxiliary vector. +/// Returns the mapped information, the entry point, and the auxiliary vector. fn map_vmos_and_build_aux_vec( vmar: &Vmar, ldso: Option<(Path, ElfHeaders)>, parsed_elf: &ElfHeaders, elf_inode: &Arc, -) -> Result<(RelocatedRange, Vaddr, AuxVec)> { +) -> Result<(ElfMappedInfo, Vaddr, AuxVec)> { let ldso_load_info = if let Some((ldso_file, ldso_elf)) = ldso { Some(load_ldso(vmar, &ldso_file, &ldso_elf)?) } else { None }; - let elf_map_range = map_segment_vmos(parsed_elf, vmar, elf_inode, ldso_load_info.is_some())?; + let elf_mapped_info = map_segment_vmos(parsed_elf, vmar, elf_inode, ldso_load_info.is_some())?; let mut aux_vec = { let ldso_base = ldso_load_info .as_ref() .map(|load_info| load_info.range.relocated_start()); - init_aux_vec(parsed_elf, &elf_map_range, ldso_base)? + init_aux_vec(parsed_elf, &elf_mapped_info.full_range, ldso_base)? }; // Set AT_SECURE based on setuid/setgid bits of the executable file. @@ -166,7 +171,8 @@ fn map_vmos_and_build_aux_vec( let entry_point = if let Some(ldso_load_info) = ldso_load_info { ldso_load_info.entry_point } else { - elf_map_range + elf_mapped_info + .full_range .relocated_addr_of(parsed_elf.entry_point()) .ok_or_else(|| { Error::with_message( @@ -176,7 +182,7 @@ fn map_vmos_and_build_aux_vec( })? }; - Ok((elf_map_range, entry_point, aux_vec)) + Ok((elf_mapped_info, entry_point, aux_vec)) } struct LdsoLoadInfo { @@ -189,7 +195,8 @@ struct LdsoLoadInfo { } fn load_ldso(vmar: &Vmar, ldso_file: &Path, ldso_elf: &ElfHeaders) -> Result { - let range = map_segment_vmos(ldso_elf, vmar, ldso_file.inode(), false)?; + let elf_mapped_info = map_segment_vmos(ldso_elf, vmar, ldso_file.inode(), false)?; + let range = elf_mapped_info.full_range; let entry_point = range .relocated_addr_of(ldso_elf.entry_point()) .ok_or_else(|| { @@ -201,11 +208,22 @@ fn load_ldso(vmar: &Vmar, ldso_file: &Path, ldso_elf: &ElfHeaders) -> Result, has_interpreter: bool, -) -> Result { +) -> Result { let elf_va_range = elf.calc_total_vaddr_bounds(); + // The base address for the heap start. If it's `None`, we will use the end of ELF segments. + let mut heap_base = None; + let map_range = if elf.is_shared_object() { // Relocatable object. @@ -261,6 +282,16 @@ fn map_segment_vmos( .offset(offset) } else { // Static PIE program: pick an aligned address from the mmap region. + + // When executing static PIE programs, place the heap area away from the + // general mmap region and into the unused `PIE_BASE_ADDR` space. + // This helps avoid early collisions, since the heap grows upward while + // the stack grows downward, and other mappings (such as the vDSO) may + // also be placed in the mmap region. + // + // Reference: + heap_base = Some(PIE_BASE_ADDR); + vmar.new_map(map_size, VmPerms::empty())?.align(align) }; let aligned_range = vmar_map_options.build().map(|addr| addr..addr + map_size)?; @@ -312,7 +343,16 @@ fn map_segment_vmos( map_segment_vmo(loadable_phdr, &elf_vmo, vmar, map_at)?; } - Ok(relocated_range) + // Calculate the data segment size. + // According to Linux behavior, the data segment only includes the last loadable segment. + // Reference: + let data_segment_size = elf.find_last_vaddr_bound().map_or(0, |range| range.len()); + + Ok(ElfMappedInfo { + full_range: relocated_range, + data_segment_size, + heap_base: heap_base.unwrap_or(map_range.end), + }) } /// Creates and maps the segment VMO to the VMAR. diff --git a/kernel/src/process/rlimit.rs b/kernel/src/process/rlimit.rs index 1605bca87..03b0462dd 100644 --- a/kernel/src/process/rlimit.rs +++ b/kernel/src/process/rlimit.rs @@ -6,7 +6,7 @@ use core::{ sync::atomic::{AtomicU64, Ordering}, }; -use super::process_vm::{INIT_STACK_SIZE, USER_HEAP_SIZE_LIMIT}; +use super::process_vm::INIT_STACK_SIZE; use crate::{ prelude::*, process::{UserNamespace, credentials::capabilities::CapSet}, @@ -48,8 +48,7 @@ impl Default for ResourceLimits { // Sets the resource limits with predefined values rlimits[ResourceType::RLIMIT_CPU as usize] = RLimit64::new(RLIM_INFINITY, RLIM_INFINITY); rlimits[ResourceType::RLIMIT_FSIZE as usize] = RLimit64::new(RLIM_INFINITY, RLIM_INFINITY); - rlimits[ResourceType::RLIMIT_DATA as usize] = - RLimit64::new(USER_HEAP_SIZE_LIMIT as u64, RLIM_INFINITY); + rlimits[ResourceType::RLIMIT_DATA as usize] = RLimit64::new(RLIM_INFINITY, RLIM_INFINITY); rlimits[ResourceType::RLIMIT_STACK as usize] = RLimit64::new(INIT_STACK_SIZE as u64, RLIM_INFINITY); rlimits[ResourceType::RLIMIT_CORE as usize] = RLimit64::new(0, RLIM_INFINITY); diff --git a/kernel/src/syscall/brk.rs b/kernel/src/syscall/brk.rs index e2d66e668..758cba229 100644 --- a/kernel/src/syscall/brk.rs +++ b/kernel/src/syscall/brk.rs @@ -10,15 +10,16 @@ pub fn sys_brk(heap_end: u64, ctx: &Context) -> Result { Some(heap_end as usize) }; debug!("new heap end = {:x?}", heap_end); + let user_space = ctx.user_space(); let user_heap = user_space.vmar().process_vm().heap(); - let syscall_ret = match new_heap_end { + let current_heap_end = match new_heap_end { Some(addr) => user_heap - .set_program_break(addr, ctx) - .unwrap_or_else(|current_break| current_break), - None => user_heap.program_break(), + .modify_heap_end(addr, ctx) + .unwrap_or_else(|cur_heap_end| cur_heap_end), + None => user_heap.heap_end(), }; - Ok(SyscallReturn::Return(syscall_ret as _)) + Ok(SyscallReturn::Return(current_heap_end as _)) } diff --git a/kernel/src/vm/vmar/vmar_impls/mod.rs b/kernel/src/vm/vmar/vmar_impls/mod.rs index 9759593de..02dc5386f 100644 --- a/kernel/src/vm/vmar/vmar_impls/mod.rs +++ b/kernel/src/vm/vmar/vmar_impls/mod.rs @@ -24,7 +24,7 @@ use super::{ }; use crate::{ prelude::*, - process::{Process, ProcessVm, ResourceType}, + process::{INIT_STACK_SIZE, Process, ProcessVm, ResourceType}, vm::vmar::is_userspace_vaddr_range, }; @@ -304,47 +304,59 @@ impl VmarInner { Ok(offset..(offset + size)) } - /// Allocates a free region for mapping. + /// Allocates a free region for mapping, searching from high address to low address. /// /// If no such region is found, return an error. fn alloc_free_region(&mut self, size: usize, align: usize) -> Result> { - // Fast path that there's still room to the end. - let highest_occupied = self - .vm_mappings - .iter() - .next_back() - .map_or(VMAR_LOWEST_ADDR, |vm_mapping| vm_mapping.range().end); - // FIXME: The up-align may overflow. - let last_occupied_aligned = highest_occupied.align_up(align); - if let Some(last) = last_occupied_aligned.checked_add(size) - && last <= VMAR_CAP_ADDR - { - return Ok(last_occupied_aligned..last); + // This value represents the highest possible address for a new mapping. + // For simplicity, we use a fixed value `2048` here. The value contains the following considerations: + // - The stack fixed padding size. + // - The stack random padding size. + // - The future growth of the stack. + // FIXME: This value should consider the process's actual stack configuration, which may + // exist in `ResourceLimits`. + let high_limit = VMAR_CAP_ADDR - INIT_STACK_SIZE - PAGE_SIZE * 2048; + let low_limit = VMAR_LOWEST_ADDR; + + fn try_alloc_in_hole( + hole_start: Vaddr, + hole_end: Vaddr, + size: usize, + align: usize, + ) -> Option> { + let start = hole_end.checked_sub(size)?.align_down(align); + if start >= hole_start { + Some(start..start + size) + } else { + None + } } - // Slow path that we need to search for a free region. - // Here, we use a simple brute-force FIRST-FIT algorithm. - // Allocate as low as possible to reduce fragmentation. - let mut last_end: Vaddr = VMAR_LOWEST_ADDR; - for vm_mapping in self.vm_mappings.iter() { - let range = vm_mapping.range(); + let mut prev_vm_mapping_start = high_limit; + for vm_mapping in self.vm_mappings.iter().rev() { + let hole_start = vm_mapping.range().end.max(low_limit); + let hole_end = prev_vm_mapping_start.min(high_limit); - debug_assert!(range.start >= last_end); - debug_assert!(range.end <= highest_occupied); - - let last_aligned = last_end.align_up(align); - let needed_end = last_aligned - .checked_add(size) - .ok_or(Error::new(Errno::ENOMEM))?; - - if needed_end <= range.start { - return Ok(last_aligned..needed_end); + if let Some(region) = try_alloc_in_hole(hole_start, hole_end, size, align) { + return Ok(region); } - last_end = range.end; + prev_vm_mapping_start = vm_mapping.range().start; + if prev_vm_mapping_start <= low_limit { + break; + } } - return_errno_with_message!(Errno::ENOMEM, "Cannot find free region for mapping"); + // Check the hole between `low_limit` and the lowest mapping. + if prev_vm_mapping_start > low_limit { + let hole_start = low_limit; + let hole_end = prev_vm_mapping_start.min(high_limit); + if let Some(region) = try_alloc_in_hole(hole_start, hole_end, size, align) { + return Ok(region); + } + } + + return_errno_with_message!(Errno::ENOMEM, "no free region for mapping can be found"); } /// Splits and unmaps the found mapping if the new size is smaller.