// SPDX-License-Identifier: MPL-2.0 //! ELF file parser. use align_ext::AlignExt; use super::{ elf_file::{ElfHeaders, LoadablePhdr}, relocate::RelocatedRange, }; use crate::{ fs::path::{FsPath, Path, PathResolver}, prelude::*, process::{ process_vm::{AuxKey, AuxVec}, program_loader::check_executable_inode, }, util::random::getrandom, vm::{ perms::VmPerms, vmar::{VMAR_CAP_ADDR, VMAR_LOWEST_ADDR, Vmar}, }, }; /// The base address for PIE (ET_DYN with INTERP) loading. /// /// Linux calls this `ELF_ET_DYN_BASE`. It has some intentions: /// - The base load address for PIE programs (ET_DYN with INTERP). /// - The heap start address for static PIE programs (ET_DYN without INTERP). /// /// References: /// - x86_64: ELF_ET_DYN_BASE = DEFAULT_MAP_WINDOW / 3 * 2 /// - riscv64: ELF_ET_DYN_BASE = (DEFAULT_MAP_WINDOW / 3) * 2 /// - loongarch64: ELF_ET_DYN_BASE = TASK_SIZE / 3 * 2 const PIE_BASE_ADDR: Vaddr = VMAR_CAP_ADDR / 3 * 2; pub struct ElfLoadInfo { /// The relocated entry point. pub entry_point: Vaddr, /// The top address of the user stack. pub user_stack_top: Vaddr, } /// Loads an ELF file to the process VMAR. /// /// This function will map ELF segments and /// initialize the init stack and heap. pub fn load_elf_to_vmar( vmar: &Vmar, elf_file: Path, path_resolver: &PathResolver, elf_headers: ElfHeaders, argv: Vec, envp: Vec, ) -> Result { let ldso = lookup_and_parse_ldso(&elf_headers, &elf_file, path_resolver)?; #[cfg_attr( not(any(target_arch = "x86_64", target_arch = "riscv64")), expect(unused_mut) )] let (elf_mapped_info, entry_point, mut aux_vec) = map_vmos_and_build_aux_vec(vmar, ldso, &elf_headers, &elf_file)?; // Map the vDSO and set the entry. // Since the vDSO does not require being mapped to any specific address, // the vDSO is mapped after the ELF file, heap, and stack. #[cfg(any(target_arch = "x86_64", target_arch = "riscv64"))] if let Some(vdso_text_base) = map_vdso_to_vmar(vmar) { #[cfg(target_arch = "riscv64")] vmar.process_vm().set_vdso_base(vdso_text_base); aux_vec.set(AuxKey::AT_SYSINFO_EHDR, vdso_text_base as u64); } vmar.process_vm() .map_and_write_init_stack(vmar, argv, envp, aux_vec)?; vmar.process_vm().map_and_init_heap( vmar, elf_mapped_info.data_segment_size, elf_mapped_info.heap_base, )?; let user_stack_top = vmar.process_vm().init_stack().user_stack_top(); Ok(ElfLoadInfo { entry_point, user_stack_top, }) } fn lookup_and_parse_ldso( headers: &ElfHeaders, elf_file: &Path, path_resolver: &PathResolver, ) -> Result> { let ldso_file = { let ldso_path = if let Some(interp_phdr) = headers.interp_phdr() { interp_phdr.read_ldso_path(elf_file.inode())? } else { return Ok(None); }; // Our FS requires the path to be valid UTF-8. This may be too restrictive. let ldso_path = ldso_path.into_string().map_err(|_| { Error::with_message( Errno::ENOEXEC, "the interpreter path is not a valid UTF-8 string", ) })?; let fs_path = FsPath::try_from(ldso_path.as_str())?; path_resolver.lookup(&fs_path)? }; let ldso_elf = { let inode = ldso_file.inode(); check_executable_inode(inode.as_ref())?; let mut buf = Box::new([0u8; PAGE_SIZE]); let len = inode.read_bytes_at(0, &mut *buf)?; if len < ElfHeaders::LEN { return_errno_with_message!(Errno::EIO, "the interpreter format is invalid"); } ElfHeaders::parse(&buf[..len]) .map_err(|_| Error::with_message(Errno::ELIBBAD, "the interpreter format is invalid"))? }; Ok(Some((ldso_file, ldso_elf))) } /// Maps the VMOs to the corresponding virtual memory addresses and builds the auxiliary vector. /// /// Returns the mapped information, the entry point, and the auxiliary vector. fn map_vmos_and_build_aux_vec( vmar: &Vmar, ldso: Option<(Path, ElfHeaders)>, parsed_elf: &ElfHeaders, elf_file: &Path, ) -> Result<(ElfMappedInfo, Vaddr, AuxVec)> { let ldso_load_info = if let Some((ldso_file, ldso_elf)) = ldso { Some(load_ldso(vmar, &ldso_file, &ldso_elf)?) } else { None }; let elf_mapped_info = map_segment_vmos(parsed_elf, vmar, elf_file, ldso_load_info.is_some())?; let mut aux_vec = { let ldso_base = ldso_load_info .as_ref() .map(|load_info| load_info.range.relocated_start()); init_aux_vec(parsed_elf, &elf_mapped_info.full_range, ldso_base)? }; // Set AT_SECURE based on setuid/setgid bits of the executable file. let mode = elf_file.inode().mode()?; let secure = if mode.has_set_uid() || mode.has_set_gid() { 1 } else { 0 }; aux_vec.set(AuxKey::AT_SECURE, secure); let entry_point = if let Some(ldso_load_info) = ldso_load_info { ldso_load_info.entry_point } else { elf_mapped_info .full_range .relocated_addr_of(parsed_elf.entry_point()) .ok_or_else(|| { Error::with_message( Errno::ENOEXEC, "the entry point is not located in any segments", ) })? }; Ok((elf_mapped_info, entry_point, aux_vec)) } struct LdsoLoadInfo { /// The relocated entry point. entry_point: Vaddr, /// The range covering all the mapped segments. /// /// Note that the range may not be page-aligned. range: RelocatedRange, } fn load_ldso(vmar: &Vmar, ldso_file: &Path, ldso_elf: &ElfHeaders) -> Result { let elf_mapped_info = map_segment_vmos(ldso_elf, vmar, ldso_file, false)?; let range = elf_mapped_info.full_range; let entry_point = range .relocated_addr_of(ldso_elf.entry_point()) .ok_or_else(|| { Error::with_message( Errno::ENOEXEC, "the entry point is not located in any segments", ) })?; Ok(LdsoLoadInfo { entry_point, range }) } /// The information of mapped ELF segments. struct ElfMappedInfo { /// The range covering all the mapped segments. full_range: RelocatedRange, /// The size of the data segment. data_segment_size: usize, /// The base address for the heap start. heap_base: Vaddr, } /// Initializes a [`Vmo`] for each segment and then map to the [`Vmar`]. /// /// This function will return the mapped information, which contains the /// mapped range that covers all segments. The range will be tight, /// i.e., will not include any padding bytes. So the boundaries may not /// be page-aligned. /// /// [`Vmo`]: crate::vm::vmo::Vmo fn map_segment_vmos( elf: &ElfHeaders, vmar: &Vmar, elf_file: &Path, has_interpreter: bool, ) -> Result { let elf_va_range = elf.calc_total_vaddr_bounds(); // The base address for the heap start. If it's `None`, we will use the end of ELF segments. let mut heap_base = None; let map_range = if elf.is_shared_object() { // Relocatable object. let align = elf.max_load_align(); // Given that `elf_va_range` is guaranteed to be below `VMAR_CAP_ADDR`, as long as // `VMAR_CAP_ADDR * 2` does not overflow, the following `align_up(align)` cannot overflow // either. const { assert!(VMAR_CAP_ADDR.checked_mul(2).is_some()) }; // Allocate a continuous range of virtual memory for all segments in advance. // // All segments in the ELF program must be mapped to a continuous VM range to // ensure the relative offset of each segment not changed. let elf_va_range_aligned = elf_va_range.start.align_down(align)..elf_va_range.end.align_up(align); let map_size = elf_va_range_aligned.len(); // There are effectively two types of ET_DYN ELF binaries: // - PIE programs (ET_DYN with PT_INTERP) and // - static PIE programs (ET_DYN without PT_INTERP, usually the ELF interpreter itself). // // Reference: let vmar_map_options = if has_interpreter { // PIE program: map near a dedicated base. // Add some random padding. let nr_pages_padding = { let mut nr_random_padding_pages: u8 = 0; getrandom(nr_random_padding_pages.as_bytes_mut()); nr_random_padding_pages as usize }; let offset = (PIE_BASE_ADDR + nr_pages_padding * PAGE_SIZE).align_down(align); if offset < VMAR_LOWEST_ADDR { return_errno_with_message!(Errno::EPERM, "the mapping address is too small"); } if VMAR_CAP_ADDR - offset < map_size { return_errno_with_message!(Errno::ENOMEM, "the mapping address is too large"); } vmar.new_map(map_size, VmPerms::empty())? .align(align) .offset(offset) } else { // Static PIE program: pick an aligned address from the mmap region. // When executing static PIE programs, place the heap area away from the // general mmap region and into the unused `PIE_BASE_ADDR` space. // This helps avoid early collisions, since the heap grows upward while // the stack grows downward, and other mappings (such as the vDSO) may // also be placed in the mmap region. // // Reference: heap_base = Some(PIE_BASE_ADDR); vmar.new_map(map_size, VmPerms::empty())?.align(align) }; let aligned_range = vmar_map_options.build().map(|addr| addr..addr + map_size)?; // After acquiring a suitable range, we can remove the mapping and then // map each segment at the desired address. vmar.remove_mapping(aligned_range.clone())?; let start_offset = elf_va_range.start - elf_va_range_aligned.start; let end_offset = elf_va_range_aligned.end - elf_va_range.end; aligned_range.start + start_offset..aligned_range.end - end_offset } else { // Not relocatable object. Map as-is. if elf_va_range.start < VMAR_LOWEST_ADDR { return_errno_with_message!(Errno::EPERM, "the mapping address is too small"); } // Allocate a continuous range of virtual memory for all segments in advance. // // This is to ensure that the range does not conflict with other objects, such // as the interpreter. let elf_va_range_aligned = elf_va_range.start.align_down(PAGE_SIZE)..elf_va_range.end.align_up(PAGE_SIZE); let map_size = elf_va_range_aligned.len(); vmar.new_map(map_size, VmPerms::empty())? .offset(elf_va_range_aligned.start) .build()?; // After acquiring a suitable range, we can remove the mapping and then // map each segment at the desired address. vmar.remove_mapping(elf_va_range_aligned.clone())?; elf_va_range.clone() }; let relocated_range = RelocatedRange::new(elf_va_range, map_range.start) .expect("`map_range` should not overflow"); for loadable_phdr in elf.loadable_phdrs() { let map_at = relocated_range .relocated_addr_of(loadable_phdr.virt_range().start) .expect("`calc_total_vaddr_bounds()` should cover all segments"); map_segment_vmo(loadable_phdr, elf_file, vmar, map_at)?; } // Calculate the data segment size. // According to Linux behavior, the data segment only includes the last loadable segment. // Reference: let data_segment_size = elf.find_last_vaddr_bound().map_or(0, |range| range.len()); Ok(ElfMappedInfo { full_range: relocated_range, data_segment_size, heap_base: heap_base.unwrap_or(map_range.end), }) } /// Creates and maps the segment VMO to the VMAR. /// /// Additional anonymous mappings will be created to represent trailing bytes, if any. For example, /// this applies to the `.bss` segment. fn map_segment_vmo( loadable_phdr: &LoadablePhdr, elf_file: &Path, vmar: &Vmar, map_at: Vaddr, ) -> Result<()> { let Some(elf_vmo) = elf_file.inode().page_cache() else { return_errno_with_message!(Errno::ENOEXEC, "the executable has no page cache"); }; let virt_range = loadable_phdr.virt_range(); let file_range = loadable_phdr.file_range(); trace!( "ELF segment: virt_range = {:#x?}, file_range = {:#x?}", virt_range, file_range, ); let total_map_size = { let vmap_start = virt_range.start.align_down(PAGE_SIZE); let vmap_end = virt_range.end.align_up(PAGE_SIZE); vmap_end - vmap_start }; let (segment_offset, segment_size) = { let start = file_range.start.align_down(PAGE_SIZE); let end = file_range.end.align_up(PAGE_SIZE); (start, end - start) }; let perms = loadable_phdr.vm_perms(); let offset = map_at.align_down(PAGE_SIZE); if segment_size != 0 { let mut vm_map_options = vmar .new_map(segment_size, perms)? .vmo(elf_vmo.clone()) .path(elf_file.clone()) .vmo_offset(segment_offset) .can_overwrite(true); vm_map_options = vm_map_options.offset(offset).handle_page_faults_around(); let map_addr = vm_map_options.build()?; // Write zero as paddings if the tail is not page-aligned and map size // is larger than file size (e.g., `.bss`). The mapping is by default // private so the writes will trigger copy-on-write. Ignore errors if // the permissions do not allow writing. // Reference: let vaddr_to_zero = map_addr + (file_range.end - segment_offset); let size_to_zero = map_addr + segment_size - vaddr_to_zero; if size_to_zero != 0 { let res = vmar.fill_zeros_remote(vaddr_to_zero, size_to_zero); if let Err((err, _)) = res && perms.contains(VmPerms::WRITE) { return Err(err); } } } let anonymous_map_size = total_map_size - segment_size; if anonymous_map_size > 0 { let mut anonymous_map_options = vmar.new_map(anonymous_map_size, perms)?.can_overwrite(true); anonymous_map_options = anonymous_map_options.offset(offset + segment_size); anonymous_map_options.build()?; } Ok(()) } fn init_aux_vec( elf: &ElfHeaders, elf_map_range: &RelocatedRange, ldso_base: Option, ) -> Result { let mut aux_vec = AuxVec::new(); aux_vec.set(AuxKey::AT_PAGESZ, PAGE_SIZE as _); let Some(ph_vaddr) = elf_map_range.relocated_addr_of(elf.find_vaddr_of_phdrs()?) else { return_errno_with_message!( Errno::ENOEXEC, "the ELF program headers are not located in any segments" ); }; aux_vec.set(AuxKey::AT_PHDR, ph_vaddr as u64); aux_vec.set(AuxKey::AT_PHNUM, elf.ph_count() as u64); aux_vec.set(AuxKey::AT_PHENT, elf.ph_ent() as u64); let Some(entry_vaddr) = elf_map_range.relocated_addr_of(elf.entry_point()) else { return_errno_with_message!( Errno::ENOEXEC, "the entry point is not located in any segments" ); }; aux_vec.set(AuxKey::AT_ENTRY, entry_vaddr as u64); if let Some(ldso_base) = ldso_base { aux_vec.set(AuxKey::AT_BASE, ldso_base as u64); } Ok(aux_vec) } /// Maps the vDSO VMO to the corresponding virtual memory address. #[cfg(any(target_arch = "x86_64", target_arch = "riscv64"))] fn map_vdso_to_vmar(vmar: &Vmar) -> Option { use crate::vdso::{VDSO_VMO_LAYOUT, vdso_vmo}; let vdso_vmo = vdso_vmo()?; let options = vmar .new_map(VDSO_VMO_LAYOUT.size, VmPerms::empty()) .unwrap() .vmo(vdso_vmo); let vdso_vmo_base = options.build().unwrap(); let vdso_data_base = vdso_vmo_base + VDSO_VMO_LAYOUT.data_segment_offset; let vdso_text_base = vdso_vmo_base + VDSO_VMO_LAYOUT.text_segment_offset; let data_perms = VmPerms::READ; let text_perms = VmPerms::READ | VmPerms::EXEC; vmar.protect( data_perms, vdso_data_base..(vdso_data_base + VDSO_VMO_LAYOUT.data_segment_size), ) .unwrap(); vmar.protect( text_perms, vdso_text_base..(vdso_text_base + VDSO_VMO_LAYOUT.text_segment_size), ) .unwrap(); Some(vdso_text_base) }