diff --git a/regression/Makefile b/regression/Makefile index 0736f9bc6..745c9e5a0 100644 --- a/regression/Makefile +++ b/regression/Makefile @@ -31,6 +31,12 @@ $(INITRAMFS)/lib/x86_64-linux-gnu: @cp -L /lib/x86_64-linux-gnu/libm.so.6 $@ @cp -L /lib/x86_64-linux-gnu/libgcc_s.so.1 $@ @cp -L /lib/x86_64-linux-gnu/libpthread.so.0 $@ + @# TODO: use a custom compiled vdso.so file in the future. + @git clone https://github.com/jinzhao-dev/linux_vdso.git + @cd ./linux_vdso && git checkout 2a6d2db 2>/dev/null + @cp -L ./linux_vdso/vdso64.so $@ + @rm -rf ./linux_vdso + $(INITRAMFS)/lib64: @mkdir -p $@ diff --git a/regression/syscall_test/Makefile b/regression/syscall_test/Makefile index 385448682..9b864904f 100644 --- a/regression/syscall_test/Makefile +++ b/regression/syscall_test/Makefile @@ -1,4 +1,4 @@ -TESTS ?= open_test read_test statfs_test chmod_test pty_test uidgid_test +TESTS ?= open_test read_test statfs_test chmod_test pty_test uidgid_test vdso_clock_gettime_test MKFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST))) CUR_DIR := $(patsubst %/,%,$(dir $(MKFILE_PATH))) diff --git a/services/libs/jinux-std/src/lib.rs b/services/libs/jinux-std/src/lib.rs index 44e64e36d..b79b0e1b3 100644 --- a/services/libs/jinux-std/src/lib.rs +++ b/services/libs/jinux-std/src/lib.rs @@ -55,6 +55,7 @@ pub mod syscall; pub mod thread; pub mod time; mod util; +pub(crate) mod vdso; pub mod vm; pub fn init() { @@ -63,6 +64,7 @@ pub fn init() { sched::init(); fs::rootfs::init(boot::initramfs()).unwrap(); device::init().unwrap(); + vdso::init(); } fn init_thread() { diff --git a/services/libs/jinux-std/src/process/program_loader/elf/aux_vec.rs b/services/libs/jinux-std/src/process/program_loader/elf/aux_vec.rs index d81e3e1a6..228c8f1f4 100644 --- a/services/libs/jinux-std/src/process/program_loader/elf/aux_vec.rs +++ b/services/libs/jinux-std/src/process/program_loader/elf/aux_vec.rs @@ -46,6 +46,7 @@ pub enum AuxKey { /* 28...30 not used */ AT_EXECFN = 31, /* filename of program */ AT_SYSINFO = 32, + AT_SYSINFO_EHDR = 33, /* the start address of the page containing the VDSO */ } impl AuxKey { diff --git a/services/libs/jinux-std/src/process/program_loader/elf/init_stack.rs b/services/libs/jinux-std/src/process/program_loader/elf/init_stack.rs index 83159f676..2f1ce7861 100644 --- a/services/libs/jinux-std/src/process/program_loader/elf/init_stack.rs +++ b/services/libs/jinux-std/src/process/program_loader/elf/init_stack.rs @@ -323,7 +323,7 @@ impl InitStack { } } -pub fn init_aux_vec(elf: &Elf, elf_map_addr: Vaddr) -> Result { +pub fn init_aux_vec(elf: &Elf, elf_map_addr: Vaddr, vdso_text_base: Vaddr) -> Result { let mut aux_vec = AuxVec::new(); aux_vec.set(AuxKey::AT_PAGESZ, PAGE_SIZE as _)?; let ph_addr = if elf.is_shared_object() { @@ -341,6 +341,7 @@ pub fn init_aux_vec(elf: &Elf, elf_map_addr: Vaddr) -> Result { elf.entry_point() }; aux_vec.set(AuxKey::AT_ENTRY, elf_entry as u64)?; + aux_vec.set(AuxKey::AT_SYSINFO_EHDR, vdso_text_base as u64)?; Ok(aux_vec) } diff --git a/services/libs/jinux-std/src/process/program_loader/elf/load_elf.rs b/services/libs/jinux-std/src/process/program_loader/elf/load_elf.rs index 7e3d6d9e9..d669ab3bd 100644 --- a/services/libs/jinux-std/src/process/program_loader/elf/load_elf.rs +++ b/services/libs/jinux-std/src/process/program_loader/elf/load_elf.rs @@ -31,6 +31,7 @@ pub fn load_elf_to_vm( fs_resolver: &FsResolver, argv: Vec, envp: Vec, + vdso_text_base: Vaddr, ) -> Result { let elf = Elf::parse_elf(file_header)?; @@ -40,9 +41,15 @@ pub fn load_elf_to_vm( None }; - process_vm.clear(); - - match init_and_map_vmos(process_vm, ldso, &elf, &elf_file, argv, envp) { + match init_and_map_vmos( + process_vm, + ldso, + &elf, + &elf_file, + argv, + envp, + vdso_text_base, + ) { Ok(elf_load_info) => Ok(elf_load_info), Err(e) => { // Since the process_vm is cleared, the process cannot return to user space again, @@ -91,6 +98,7 @@ fn init_and_map_vmos( elf_file: &Dentry, argv: Vec, envp: Vec, + vdso_text_base: Vaddr, ) -> Result { let root_vmar = process_vm.root_vmar(); @@ -102,7 +110,7 @@ fn init_and_map_vmos( }; let map_addr = map_segment_vmos(elf, root_vmar, elf_file)?; - let mut aux_vec = init_aux_vec(elf, map_addr)?; + let mut aux_vec = init_aux_vec(elf, map_addr, vdso_text_base)?; let mut init_stack = InitStack::new_default_config(argv, envp); init_stack.init(root_vmar, elf, &ldso_load_info, &mut aux_vec)?; let entry_point = if let Some(ldso_load_info) = ldso_load_info { diff --git a/services/libs/jinux-std/src/process/program_loader/mod.rs b/services/libs/jinux-std/src/process/program_loader/mod.rs index 3c4deea08..c1456e1f5 100644 --- a/services/libs/jinux-std/src/process/program_loader/mod.rs +++ b/services/libs/jinux-std/src/process/program_loader/mod.rs @@ -4,12 +4,37 @@ mod shebang; use crate::fs::fs_resolver::{FsPath, FsResolver, AT_FDCWD}; use crate::fs::utils::Dentry; use crate::prelude::*; +use crate::vdso::vdso_vmo; +use crate::vm::perms::VmPerms; use self::elf::{load_elf_to_vm, ElfLoadInfo}; use self::shebang::parse_shebang_line; use super::process_vm::ProcessVm; +/// Map the vdso vmo to the corresponding virtual memory address. +pub fn map_vdso_to_vm(process_vm: &ProcessVm) -> Vaddr { + let root_vmar = process_vm.root_vmar(); + let vdso_vmo = vdso_vmo(); + + let options = root_vmar + .new_map(vdso_vmo.dup().unwrap(), VmPerms::empty()) + .unwrap() + .size(5 * PAGE_SIZE); + let vdso_data_base = options.build().unwrap(); + let vdso_text_base = vdso_data_base + 0x4000; + + let data_perms = VmPerms::READ | VmPerms::WRITE; + let text_perms = VmPerms::READ | VmPerms::EXEC; + root_vmar + .protect(data_perms, vdso_data_base..vdso_data_base + PAGE_SIZE) + .unwrap(); + root_vmar + .protect(text_perms, vdso_text_base..vdso_text_base + PAGE_SIZE) + .unwrap(); + vdso_text_base +} + /// Load an executable to root vmar, including loading programe image, preparing heap and stack, /// initializing argv, envp and aux tables. /// About recursion_limit: recursion limit is used to limit th recursion depth of shebang executables. @@ -53,8 +78,18 @@ pub fn load_program_to_vm( recursion_limit - 1, ); } - let elf_load_info = - load_elf_to_vm(process_vm, &*file_header, elf_file, fs_resolver, argv, envp)?; + process_vm.clear(); + let vdso_text_base = map_vdso_to_vm(process_vm); + let elf_load_info = load_elf_to_vm( + process_vm, + &*file_header, + elf_file, + fs_resolver, + argv, + envp, + vdso_text_base, + )?; + Ok((abs_path, elf_load_info)) } diff --git a/services/libs/jinux-std/src/vdso.rs b/services/libs/jinux-std/src/vdso.rs new file mode 100644 index 000000000..d42e54551 --- /dev/null +++ b/services/libs/jinux-std/src/vdso.rs @@ -0,0 +1,247 @@ +//! The Virtual Dynamic Shared Object (VDSO) module enables user space applications to access kernel space routines +//! without the need for context switching. This is particularly useful for frequently invoked operations such as +//! obtaining the current time, which can be more efficiently handled within the user space. +//! +//! This module manages the VDSO mechanism through the `Vdso` struct, which contains a `VdsoData` instance with +//! necessary time-related information, and a Virtual Memory Object (VMO) that encapsulates both the data and the +//! VDSO routines. The VMO is intended to be mapped into the address space of every user space process for efficient access. +//! +//! The module is initialized with `init`, which sets up the `START_SEC_COUNT` and prepares the VDSO instance for +//! use. It also hooks up the VDSO data update routine to the time management subsystem for periodic updates. + +use alloc::boxed::Box; +use alloc::sync::Arc; +use jinux_frame::{config::PAGE_SIZE, sync::Mutex, vm::VmIo}; +use jinux_rights::Rights; +use jinux_time::Instant; +use jinux_util::coeff::Coeff; +use pod::Pod; +use spin::Once; + +use crate::{ + fs::fs_resolver::{FsPath, FsResolver, AT_FDCWD}, + time::{ClockID, SystemTime}, + vm::vmo::{Vmo, VmoOptions}, +}; + +const CLOCK_TAI: usize = 11; +const VDSO_BASES: usize = CLOCK_TAI + 1; +const DEFAULT_CLOCK_MODE: VdsoClockMode = VdsoClockMode::Tsc; + +static START_SEC_COUNT: Once = Once::new(); +static VDSO: Once> = Once::new(); + +#[derive(Debug, Copy, Clone)] +enum VdsoClockMode { + None = 0, + Tsc = 1, + Pvclock = 2, + Hvclock = 3, + Timens = i32::MAX as isize, +} + +/// Instant used in `VdsoData`. +/// The `VdsoInstant` records the second of an instant, +/// and the calculation results of multiplying `nanos` with `mult` in the corresponding `VdsoData`. +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Pod)] +struct VdsoInstant { + sec: u64, + nanos_mult: u64, +} + +impl VdsoInstant { + const fn zero() -> Self { + Self { + sec: 0, + nanos_mult: 0, + } + } +} + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Pod)] +struct ArchVdsoData {} + +/// A POD (Plain Old Data) structure maintaining timing information that required for userspace. +/// +/// Since currently we directly use the vdso shared library of Linux, +/// currently it aligns with the Linux VDSO shared library format and contents +/// (Linux v6.2.10) +#[repr(C)] +#[derive(Debug, Copy, Clone, Pod)] +struct VdsoData { + seq: u32, + + clock_mode: i32, + last_cycles: u64, + mask: u64, + mult: u32, + shift: u32, + basetime: [VdsoInstant; VDSO_BASES], + + tz_minuteswest: i32, + tz_dsttime: i32, + hrtimer_res: u32, + __unused: u32, + + arch_data: ArchVdsoData, +} + +impl VdsoData { + const fn empty() -> Self { + VdsoData { + seq: 0, + clock_mode: VdsoClockMode::None as i32, + last_cycles: 0, + mask: 0, + mult: 0, + shift: 0, + basetime: [VdsoInstant::zero(); VDSO_BASES], + tz_minuteswest: 0, + tz_dsttime: 0, + hrtimer_res: 0, + __unused: 0, + arch_data: ArchVdsoData {}, + } + } + + /// Init vdso data based on the default clocksource. + fn init(&mut self) { + let clocksource = jinux_time::default_clocksource(); + let coeff = clocksource.coeff(); + self.set_clock_mode(DEFAULT_CLOCK_MODE); + self.set_coeff(coeff); + self.update_instant(clocksource.last_instant(), clocksource.last_cycles()); + } + + fn set_clock_mode(&mut self, mode: VdsoClockMode) { + self.clock_mode = mode as i32; + } + + fn set_coeff(&mut self, coeff: &Coeff) { + self.mult = coeff.mult(); + self.shift = coeff.shift(); + } + + fn update_clock_instant(&mut self, clockid: usize, sec: u64, nanos_mult: u64) { + self.basetime[clockid].sec = sec; + self.basetime[clockid].nanos_mult = nanos_mult; + } + + fn update_instant(&mut self, instant: Instant, instant_cycles: u64) { + self.last_cycles = instant_cycles; + self.update_clock_instant( + ClockID::CLOCK_REALTIME as usize, + instant.secs() + START_SEC_COUNT.get().unwrap(), + instant.nanos() as u64 * self.mult as u64, + ); + self.update_clock_instant( + ClockID::CLOCK_MONOTONIC as usize, + instant.secs(), + instant.nanos() as u64 * self.mult as u64, + ); + } +} + +/// Vdso (virtual dynamic shared object) is used to export some safe kernel space routines to user space applications +/// so that applications can call these kernel space routines in-process, without context switching. +/// +/// Vdso maintains a `VdsoData` instance that contains data information required for vdso mechanism, +/// and a `Vmo` that contains all vdso-related information, including the vdso data and the vdso calling interfaces. +/// This `Vmo` must be mapped to every userspace process. +struct Vdso { + /// A VdsoData instance. + data: Mutex, + /// the vmo of the entire vdso, including the library text and the vdso data. + vmo: Arc, +} + +impl Vdso { + /// Construct a new Vdso, including an initialized `VdsoData` and a vmo of the vdso. + fn new() -> Self { + let mut vdso_data = VdsoData::empty(); + vdso_data.init(); + + let vdso_vmo = { + let vmo_options = VmoOptions::::new(5 * PAGE_SIZE); + let vdso_vmo = vmo_options.alloc().unwrap(); + // Write vdso data to vdso vmo. + vdso_vmo.write_bytes(0x80, vdso_data.as_bytes()).unwrap(); + + let vdso_lib_vmo = { + let vdso_path = FsPath::new(AT_FDCWD, "/lib/x86_64-linux-gnu/vdso64.so").unwrap(); + let fs_resolver = FsResolver::new(); + let vdso_lib = fs_resolver.lookup(&vdso_path).unwrap(); + vdso_lib.inode().page_cache().unwrap() + }; + let mut vdso_text = Box::new([0u8; PAGE_SIZE]); + vdso_lib_vmo.read_bytes(0, &mut *vdso_text).unwrap(); + // Write vdso library to vdso vmo. + vdso_vmo.write_bytes(0x4000, &*vdso_text).unwrap(); + + vdso_vmo + }; + Self { + data: Mutex::new(vdso_data), + vmo: Arc::new(vdso_vmo), + } + } + + /// Return the vdso vmo. + fn vmo(&self) -> Arc { + self.vmo.clone() + } + + fn update_instant(&self, instant: Instant, instant_cycles: u64) { + self.data.lock().update_instant(instant, instant_cycles); + + self.vmo.write_val(0x80, &1).unwrap(); + self.update_vmo_instant(ClockID::CLOCK_REALTIME); + self.update_vmo_instant(ClockID::CLOCK_MONOTONIC); + self.vmo.write_val(0x80, &0).unwrap(); + } + + /// Update the requisite fields of the vdso data in the vmo. + fn update_vmo_instant(&self, clockid: ClockID) { + let clock_index = clockid as usize; + let secs_offset = 0xA0 + clock_index * 0x10; + let nanos_mult_offset = 0xA8 + clock_index * 0x10; + let data = self.data.lock(); + self.vmo + .write_val(secs_offset, &data.basetime[clock_index].sec) + .unwrap(); + self.vmo + .write_val(nanos_mult_offset, &data.basetime[clock_index].nanos_mult) + .unwrap(); + } +} + +/// Update the `VdsoInstant` in Vdso. +fn update_vdso_instant(instant: Instant, instant_cycles: u64) { + VDSO.get().unwrap().update_instant(instant, instant_cycles); +} + +/// Init `START_SEC_COUNT`, which is used to record the seconds passed since 1970-01-01 00:00:00. +fn init_start_sec_count() { + let now = SystemTime::now(); + let time_duration = now.duration_since(&SystemTime::UNIX_EPOCH).unwrap(); + START_SEC_COUNT.call_once(|| time_duration.as_secs()); +} + +fn init_vdso() { + let vdso = Vdso::new(); + VDSO.call_once(|| Arc::new(vdso)); +} + +/// Init vdso module. +pub(super) fn init() { + init_start_sec_count(); + init_vdso(); + jinux_time::VDSO_DATA_UPDATE.call_once(|| Arc::new(update_vdso_instant)); +} + +/// Return the vdso vmo. +pub(crate) fn vdso_vmo() -> Arc { + VDSO.get().unwrap().vmo().clone() +}