Support VDSO in Jinux

This commit is contained in:
Chen Chengjun 2023-12-06 15:03:52 +08:00 committed by Tate, Hongliang Tian
parent 715072b9f3
commit 2ad9735eab
8 changed files with 308 additions and 8 deletions

View File

@ -31,6 +31,12 @@ $(INITRAMFS)/lib/x86_64-linux-gnu:
@cp -L /lib/x86_64-linux-gnu/libm.so.6 $@
@cp -L /lib/x86_64-linux-gnu/libgcc_s.so.1 $@
@cp -L /lib/x86_64-linux-gnu/libpthread.so.0 $@
@# TODO: use a custom compiled vdso.so file in the future.
@git clone https://github.com/jinzhao-dev/linux_vdso.git
@cd ./linux_vdso && git checkout 2a6d2db 2>/dev/null
@cp -L ./linux_vdso/vdso64.so $@
@rm -rf ./linux_vdso
$(INITRAMFS)/lib64:
@mkdir -p $@

View File

@ -1,4 +1,4 @@
TESTS ?= open_test read_test statfs_test chmod_test pty_test uidgid_test
TESTS ?= open_test read_test statfs_test chmod_test pty_test uidgid_test vdso_clock_gettime_test
MKFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
CUR_DIR := $(patsubst %/,%,$(dir $(MKFILE_PATH)))

View File

@ -55,6 +55,7 @@ pub mod syscall;
pub mod thread;
pub mod time;
mod util;
pub(crate) mod vdso;
pub mod vm;
pub fn init() {
@ -63,6 +64,7 @@ pub fn init() {
sched::init();
fs::rootfs::init(boot::initramfs()).unwrap();
device::init().unwrap();
vdso::init();
}
fn init_thread() {

View File

@ -46,6 +46,7 @@ pub enum AuxKey {
/* 28...30 not used */
AT_EXECFN = 31, /* filename of program */
AT_SYSINFO = 32,
AT_SYSINFO_EHDR = 33, /* the start address of the page containing the VDSO */
}
impl AuxKey {

View File

@ -323,7 +323,7 @@ impl InitStack {
}
}
pub fn init_aux_vec(elf: &Elf, elf_map_addr: Vaddr) -> Result<AuxVec> {
pub fn init_aux_vec(elf: &Elf, elf_map_addr: Vaddr, vdso_text_base: Vaddr) -> Result<AuxVec> {
let mut aux_vec = AuxVec::new();
aux_vec.set(AuxKey::AT_PAGESZ, PAGE_SIZE as _)?;
let ph_addr = if elf.is_shared_object() {
@ -341,6 +341,7 @@ pub fn init_aux_vec(elf: &Elf, elf_map_addr: Vaddr) -> Result<AuxVec> {
elf.entry_point()
};
aux_vec.set(AuxKey::AT_ENTRY, elf_entry as u64)?;
aux_vec.set(AuxKey::AT_SYSINFO_EHDR, vdso_text_base as u64)?;
Ok(aux_vec)
}

View File

@ -31,6 +31,7 @@ pub fn load_elf_to_vm(
fs_resolver: &FsResolver,
argv: Vec<CString>,
envp: Vec<CString>,
vdso_text_base: Vaddr,
) -> Result<ElfLoadInfo> {
let elf = Elf::parse_elf(file_header)?;
@ -40,9 +41,15 @@ pub fn load_elf_to_vm(
None
};
process_vm.clear();
match init_and_map_vmos(process_vm, ldso, &elf, &elf_file, argv, envp) {
match init_and_map_vmos(
process_vm,
ldso,
&elf,
&elf_file,
argv,
envp,
vdso_text_base,
) {
Ok(elf_load_info) => Ok(elf_load_info),
Err(e) => {
// Since the process_vm is cleared, the process cannot return to user space again,
@ -91,6 +98,7 @@ fn init_and_map_vmos(
elf_file: &Dentry,
argv: Vec<CString>,
envp: Vec<CString>,
vdso_text_base: Vaddr,
) -> Result<ElfLoadInfo> {
let root_vmar = process_vm.root_vmar();
@ -102,7 +110,7 @@ fn init_and_map_vmos(
};
let map_addr = map_segment_vmos(elf, root_vmar, elf_file)?;
let mut aux_vec = init_aux_vec(elf, map_addr)?;
let mut aux_vec = init_aux_vec(elf, map_addr, vdso_text_base)?;
let mut init_stack = InitStack::new_default_config(argv, envp);
init_stack.init(root_vmar, elf, &ldso_load_info, &mut aux_vec)?;
let entry_point = if let Some(ldso_load_info) = ldso_load_info {

View File

@ -4,12 +4,37 @@ mod shebang;
use crate::fs::fs_resolver::{FsPath, FsResolver, AT_FDCWD};
use crate::fs::utils::Dentry;
use crate::prelude::*;
use crate::vdso::vdso_vmo;
use crate::vm::perms::VmPerms;
use self::elf::{load_elf_to_vm, ElfLoadInfo};
use self::shebang::parse_shebang_line;
use super::process_vm::ProcessVm;
/// Map the vdso vmo to the corresponding virtual memory address.
pub fn map_vdso_to_vm(process_vm: &ProcessVm) -> Vaddr {
let root_vmar = process_vm.root_vmar();
let vdso_vmo = vdso_vmo();
let options = root_vmar
.new_map(vdso_vmo.dup().unwrap(), VmPerms::empty())
.unwrap()
.size(5 * PAGE_SIZE);
let vdso_data_base = options.build().unwrap();
let vdso_text_base = vdso_data_base + 0x4000;
let data_perms = VmPerms::READ | VmPerms::WRITE;
let text_perms = VmPerms::READ | VmPerms::EXEC;
root_vmar
.protect(data_perms, vdso_data_base..vdso_data_base + PAGE_SIZE)
.unwrap();
root_vmar
.protect(text_perms, vdso_text_base..vdso_text_base + PAGE_SIZE)
.unwrap();
vdso_text_base
}
/// Load an executable to root vmar, including loading programe image, preparing heap and stack,
/// initializing argv, envp and aux tables.
/// About recursion_limit: recursion limit is used to limit th recursion depth of shebang executables.
@ -53,8 +78,18 @@ pub fn load_program_to_vm(
recursion_limit - 1,
);
}
let elf_load_info =
load_elf_to_vm(process_vm, &*file_header, elf_file, fs_resolver, argv, envp)?;
process_vm.clear();
let vdso_text_base = map_vdso_to_vm(process_vm);
let elf_load_info = load_elf_to_vm(
process_vm,
&*file_header,
elf_file,
fs_resolver,
argv,
envp,
vdso_text_base,
)?;
Ok((abs_path, elf_load_info))
}

View File

@ -0,0 +1,247 @@
//! The Virtual Dynamic Shared Object (VDSO) module enables user space applications to access kernel space routines
//! without the need for context switching. This is particularly useful for frequently invoked operations such as
//! obtaining the current time, which can be more efficiently handled within the user space.
//!
//! This module manages the VDSO mechanism through the `Vdso` struct, which contains a `VdsoData` instance with
//! necessary time-related information, and a Virtual Memory Object (VMO) that encapsulates both the data and the
//! VDSO routines. The VMO is intended to be mapped into the address space of every user space process for efficient access.
//!
//! The module is initialized with `init`, which sets up the `START_SEC_COUNT` and prepares the VDSO instance for
//! use. It also hooks up the VDSO data update routine to the time management subsystem for periodic updates.
use alloc::boxed::Box;
use alloc::sync::Arc;
use jinux_frame::{config::PAGE_SIZE, sync::Mutex, vm::VmIo};
use jinux_rights::Rights;
use jinux_time::Instant;
use jinux_util::coeff::Coeff;
use pod::Pod;
use spin::Once;
use crate::{
fs::fs_resolver::{FsPath, FsResolver, AT_FDCWD},
time::{ClockID, SystemTime},
vm::vmo::{Vmo, VmoOptions},
};
const CLOCK_TAI: usize = 11;
const VDSO_BASES: usize = CLOCK_TAI + 1;
const DEFAULT_CLOCK_MODE: VdsoClockMode = VdsoClockMode::Tsc;
static START_SEC_COUNT: Once<u64> = Once::new();
static VDSO: Once<Arc<Vdso>> = Once::new();
#[derive(Debug, Copy, Clone)]
enum VdsoClockMode {
None = 0,
Tsc = 1,
Pvclock = 2,
Hvclock = 3,
Timens = i32::MAX as isize,
}
/// Instant used in `VdsoData`.
/// The `VdsoInstant` records the second of an instant,
/// and the calculation results of multiplying `nanos` with `mult` in the corresponding `VdsoData`.
#[repr(C)]
#[derive(Debug, Default, Copy, Clone, Pod)]
struct VdsoInstant {
sec: u64,
nanos_mult: u64,
}
impl VdsoInstant {
const fn zero() -> Self {
Self {
sec: 0,
nanos_mult: 0,
}
}
}
#[repr(C)]
#[derive(Debug, Default, Copy, Clone, Pod)]
struct ArchVdsoData {}
/// A POD (Plain Old Data) structure maintaining timing information that required for userspace.
///
/// Since currently we directly use the vdso shared library of Linux,
/// currently it aligns with the Linux VDSO shared library format and contents
/// (Linux v6.2.10)
#[repr(C)]
#[derive(Debug, Copy, Clone, Pod)]
struct VdsoData {
seq: u32,
clock_mode: i32,
last_cycles: u64,
mask: u64,
mult: u32,
shift: u32,
basetime: [VdsoInstant; VDSO_BASES],
tz_minuteswest: i32,
tz_dsttime: i32,
hrtimer_res: u32,
__unused: u32,
arch_data: ArchVdsoData,
}
impl VdsoData {
const fn empty() -> Self {
VdsoData {
seq: 0,
clock_mode: VdsoClockMode::None as i32,
last_cycles: 0,
mask: 0,
mult: 0,
shift: 0,
basetime: [VdsoInstant::zero(); VDSO_BASES],
tz_minuteswest: 0,
tz_dsttime: 0,
hrtimer_res: 0,
__unused: 0,
arch_data: ArchVdsoData {},
}
}
/// Init vdso data based on the default clocksource.
fn init(&mut self) {
let clocksource = jinux_time::default_clocksource();
let coeff = clocksource.coeff();
self.set_clock_mode(DEFAULT_CLOCK_MODE);
self.set_coeff(coeff);
self.update_instant(clocksource.last_instant(), clocksource.last_cycles());
}
fn set_clock_mode(&mut self, mode: VdsoClockMode) {
self.clock_mode = mode as i32;
}
fn set_coeff(&mut self, coeff: &Coeff) {
self.mult = coeff.mult();
self.shift = coeff.shift();
}
fn update_clock_instant(&mut self, clockid: usize, sec: u64, nanos_mult: u64) {
self.basetime[clockid].sec = sec;
self.basetime[clockid].nanos_mult = nanos_mult;
}
fn update_instant(&mut self, instant: Instant, instant_cycles: u64) {
self.last_cycles = instant_cycles;
self.update_clock_instant(
ClockID::CLOCK_REALTIME as usize,
instant.secs() + START_SEC_COUNT.get().unwrap(),
instant.nanos() as u64 * self.mult as u64,
);
self.update_clock_instant(
ClockID::CLOCK_MONOTONIC as usize,
instant.secs(),
instant.nanos() as u64 * self.mult as u64,
);
}
}
/// Vdso (virtual dynamic shared object) is used to export some safe kernel space routines to user space applications
/// so that applications can call these kernel space routines in-process, without context switching.
///
/// Vdso maintains a `VdsoData` instance that contains data information required for vdso mechanism,
/// and a `Vmo` that contains all vdso-related information, including the vdso data and the vdso calling interfaces.
/// This `Vmo` must be mapped to every userspace process.
struct Vdso {
/// A VdsoData instance.
data: Mutex<VdsoData>,
/// the vmo of the entire vdso, including the library text and the vdso data.
vmo: Arc<Vmo>,
}
impl Vdso {
/// Construct a new Vdso, including an initialized `VdsoData` and a vmo of the vdso.
fn new() -> Self {
let mut vdso_data = VdsoData::empty();
vdso_data.init();
let vdso_vmo = {
let vmo_options = VmoOptions::<Rights>::new(5 * PAGE_SIZE);
let vdso_vmo = vmo_options.alloc().unwrap();
// Write vdso data to vdso vmo.
vdso_vmo.write_bytes(0x80, vdso_data.as_bytes()).unwrap();
let vdso_lib_vmo = {
let vdso_path = FsPath::new(AT_FDCWD, "/lib/x86_64-linux-gnu/vdso64.so").unwrap();
let fs_resolver = FsResolver::new();
let vdso_lib = fs_resolver.lookup(&vdso_path).unwrap();
vdso_lib.inode().page_cache().unwrap()
};
let mut vdso_text = Box::new([0u8; PAGE_SIZE]);
vdso_lib_vmo.read_bytes(0, &mut *vdso_text).unwrap();
// Write vdso library to vdso vmo.
vdso_vmo.write_bytes(0x4000, &*vdso_text).unwrap();
vdso_vmo
};
Self {
data: Mutex::new(vdso_data),
vmo: Arc::new(vdso_vmo),
}
}
/// Return the vdso vmo.
fn vmo(&self) -> Arc<Vmo> {
self.vmo.clone()
}
fn update_instant(&self, instant: Instant, instant_cycles: u64) {
self.data.lock().update_instant(instant, instant_cycles);
self.vmo.write_val(0x80, &1).unwrap();
self.update_vmo_instant(ClockID::CLOCK_REALTIME);
self.update_vmo_instant(ClockID::CLOCK_MONOTONIC);
self.vmo.write_val(0x80, &0).unwrap();
}
/// Update the requisite fields of the vdso data in the vmo.
fn update_vmo_instant(&self, clockid: ClockID) {
let clock_index = clockid as usize;
let secs_offset = 0xA0 + clock_index * 0x10;
let nanos_mult_offset = 0xA8 + clock_index * 0x10;
let data = self.data.lock();
self.vmo
.write_val(secs_offset, &data.basetime[clock_index].sec)
.unwrap();
self.vmo
.write_val(nanos_mult_offset, &data.basetime[clock_index].nanos_mult)
.unwrap();
}
}
/// Update the `VdsoInstant` in Vdso.
fn update_vdso_instant(instant: Instant, instant_cycles: u64) {
VDSO.get().unwrap().update_instant(instant, instant_cycles);
}
/// Init `START_SEC_COUNT`, which is used to record the seconds passed since 1970-01-01 00:00:00.
fn init_start_sec_count() {
let now = SystemTime::now();
let time_duration = now.duration_since(&SystemTime::UNIX_EPOCH).unwrap();
START_SEC_COUNT.call_once(|| time_duration.as_secs());
}
fn init_vdso() {
let vdso = Vdso::new();
VDSO.call_once(|| Arc::new(vdso));
}
/// Init vdso module.
pub(super) fn init() {
init_start_sec_count();
init_vdso();
jinux_time::VDSO_DATA_UPDATE.call_once(|| Arc::new(update_vdso_instant));
}
/// Return the vdso vmo.
pub(crate) fn vdso_vmo() -> Arc<Vmo> {
VDSO.get().unwrap().vmo().clone()
}