From 6af524b4518694d6d8f6934b70daea5788a3a14f Mon Sep 17 00:00:00 2001 From: Zejun Zhao Date: Tue, 28 Oct 2025 17:05:01 +0800 Subject: [PATCH] Support RISC-V Sv39 Paging mode --- kernel/Cargo.toml | 2 ++ ostd/Cargo.toml | 1 + ostd/src/arch/riscv/boot/boot.S | 54 +++++++++++++++++++++++++++------ ostd/src/arch/riscv/mm/mod.rs | 19 +++++++++++- ostd/src/lib.rs | 7 +++++ ostd/src/mm/kspace/mod.rs | 52 ++++++++++++++++++++----------- ostd/src/mm/mod.rs | 21 ++----------- 7 files changed, 109 insertions(+), 47 deletions(-) diff --git a/kernel/Cargo.toml b/kernel/Cargo.toml index 2356874a9..1605725ae 100644 --- a/kernel/Cargo.toml +++ b/kernel/Cargo.toml @@ -77,6 +77,8 @@ loongArch64 = "0.2.5" all = ["cvm_guest"] cvm_guest = ["dep:tdx-guest", "ostd/cvm_guest", "aster-virtio/cvm_guest"] coverage = ["ostd/coverage"] +# By default we use the Sv48 address translation mode. +riscv_sv39_mode = ["ostd/riscv_sv39_mode"] [lints] workspace = true diff --git a/ostd/Cargo.toml b/ostd/Cargo.toml index c5636cc93..07bfdb27d 100644 --- a/ostd/Cargo.toml +++ b/ostd/Cargo.toml @@ -75,6 +75,7 @@ default = ["cvm_guest"] # The guest OS support for Confidential VMs (CVMs), e.g., Intel TDX cvm_guest = ["dep:tdx-guest", "dep:iced-x86"] coverage = ["minicov"] +riscv_sv39_mode = [] [lints] workspace = true diff --git a/ostd/src/arch/riscv/boot/boot.S b/ostd/src/arch/riscv/boot/boot.S index c815fa10e..5f78b0f1f 100644 --- a/ostd/src/arch/riscv/boot/boot.S +++ b/ostd/src/arch/riscv/boot/boot.S @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: MPL-2.0 */ +SATP_MODE_SV39 = 8 << 60 SATP_MODE_SV48 = 9 << 60 SATP_PPN_SHIFT = 0 @@ -22,22 +23,36 @@ _start: # a0 = hart id # a1 = device tree paddr (not touched) - # Set up the page table. - # boot_l4pt[511] = (PPN(boot_l3pt) << PTE_PPN_SHIFT) | PTE_V - lla t1, boot_l4pt + # Set up the Sv48 page table. + # sv48_boot_l4pt[511] = (PPN(sv48_boot_l3pt) << PTE_PPN_SHIFT) | PTE_V + lla t1, sv48_boot_l4pt li t0, 511 * PTE_SIZE add t1, t1, t0 - lla t0, boot_l3pt + lla t0, sv48_boot_l3pt srli t0, t0, PAGE_SHIFT - PTE_PPN_SHIFT ori t0, t0, PTE_V sd t0, 0(t1) - # Load the page table. - lla t0, boot_l4pt + # Try loading the Sv48 page table. + lla t0, sv48_boot_l4pt li t1, SATP_MODE_SV48 srli t0, t0, PAGE_SHIFT - SATP_PPN_SHIFT or t0, t0, t1 csrw satp, t0 + + # Check if the write to satp succeeds. If not, try Sv39. + # Reference: . + csrr t1, satp + beq t0, t1, flush_tlb + + # Try loading the Sv39 page table. + la t0, sv39_boot_l3pt + li t1, SATP_MODE_SV39 + srli t0, t0, PAGE_SHIFT - SATP_PPN_SHIFT + or t0, t0, t1 + csrw satp, t0 + +flush_tlb: sfence.vma # Update SP/PC to use the virtual address. @@ -51,19 +66,38 @@ _start: PTE_VRWX = PTE_V | PTE_R | PTE_W | PTE_X .balign 4096 -boot_l4pt: - .quad (0x00000 << PTE_PPN_SHIFT) | PTE_VRWX # identity 0~512 GiB +sv48_boot_l4pt: + .quad (0x0 << PTE_PPN_SHIFT) | PTE_VRWX # identity 0~512 GiB .zero 255 * PTE_SIZE - .quad (0x00000 << PTE_PPN_SHIFT) | PTE_VRWX # linear 0~512 GiB + .quad (0x0 << PTE_PPN_SHIFT) | PTE_VRWX # linear 0~512 GiB .zero 254 * PTE_SIZE .quad 0 # TBA (-> boot_l3pt) -boot_l3pt: # 0xffff_ffff_0000_0000 -> 0x0000_0000_0000_0000 +sv48_boot_l3pt: # 0xffff_ffff_0000_0000 -> 0x0000_0000_0000_0000 .zero 508 * PTE_SIZE .quad (0x00000 << PTE_PPN_SHIFT) | PTE_VRWX # code 0~1 GiB .quad (0x40000 << PTE_PPN_SHIFT) | PTE_VRWX # code 1~2 GiB .quad (0x80000 << PTE_PPN_SHIFT) | PTE_VRWX # code 2~3 GiB .quad 0 +.balign 4096 +sv39_boot_l3pt: + .set i, 0 + .rept 128 # identity 0~128 GiB + .quad ((i * 0x40000) << PTE_PPN_SHIFT) | PTE_VRWX + .set i, i + 1 + .endr + .zero 128 * PTE_SIZE + .set i, 0 + .rept 128 # linear 0~128 GiB + .quad ((i * 0x40000) << PTE_PPN_SHIFT) | PTE_VRWX + .set i, i + 1 + .endr + .zero 124 * PTE_SIZE + .quad (0x00000 << PTE_PPN_SHIFT) | PTE_VRWX # code 0~1 GiB + .quad (0x40000 << PTE_PPN_SHIFT) | PTE_VRWX # code 1~2 GiB + .quad (0x80000 << PTE_PPN_SHIFT) | PTE_VRWX # code 2~3 GiB + .quad 0 + .section ".boot.stack", "aw", @nobits boot_stack_bottom: diff --git a/ostd/src/arch/riscv/mm/mod.rs b/ostd/src/arch/riscv/mm/mod.rs index 2d77b73fc..2e807baa1 100644 --- a/ostd/src/arch/riscv/mm/mod.rs +++ b/ostd/src/arch/riscv/mm/mod.rs @@ -26,6 +26,7 @@ mod util; #[derive(Clone, Debug, Default)] pub(crate) struct PagingConsts {} +#[cfg(not(feature = "riscv_sv39_mode"))] impl PagingConstsTrait for PagingConsts { const BASE_PAGE_SIZE: usize = 4096; const NR_LEVELS: PagingLevel = 4; @@ -35,6 +36,16 @@ impl PagingConstsTrait for PagingConsts { const PTE_SIZE: usize = size_of::(); } +#[cfg(feature = "riscv_sv39_mode")] +impl PagingConstsTrait for PagingConsts { + const BASE_PAGE_SIZE: usize = 4096; + const NR_LEVELS: PagingLevel = 3; + const ADDRESS_WIDTH: usize = 39; + const VA_SIGN_EXT: bool = true; + const HIGHEST_TRANSLATION_LEVEL: PagingLevel = 2; + const PTE_SIZE: usize = size_of::(); +} + bitflags::bitflags! { #[derive(Pod)] #[repr(C)] @@ -153,8 +164,14 @@ pub(crate) struct PageTableEntry(usize); pub(crate) unsafe fn activate_page_table(root_paddr: Paddr, _root_pt_cache: CachePolicy) { assert!(root_paddr % PagingConsts::BASE_PAGE_SIZE == 0); let ppn = root_paddr >> 12; + + #[cfg(not(feature = "riscv_sv39_mode"))] + let mode = riscv::register::satp::Mode::Sv48; + #[cfg(feature = "riscv_sv39_mode")] + let mode = riscv::register::satp::Mode::Sv39; + unsafe { - riscv::register::satp::set(riscv::register::satp::Mode::Sv48, 0, ppn); + riscv::register::satp::set(mode, 0, ppn); } } diff --git a/ostd/src/lib.rs b/ostd/src/lib.rs index 409d9baac..7231e94e0 100644 --- a/ostd/src/lib.rs +++ b/ostd/src/lib.rs @@ -161,6 +161,13 @@ fn invoke_ffi_init_funcs() { } } +mod feature_validation { + #[cfg(all(not(target_arch = "riscv64"), feature = "riscv_sv39_mode"))] + compile_error!( + "feature \"riscv_sv39_mode\" cannot be specified for architectures other than RISC-V" + ); +} + /// Simple unit tests for the ktest framework. #[cfg(ktest)] mod test { diff --git a/ostd/src/mm/kspace/mod.rs b/ostd/src/mm/kspace/mod.rs index ed9578cd9..c4601ec93 100644 --- a/ostd/src/mm/kspace/mod.rs +++ b/ostd/src/mm/kspace/mod.rs @@ -57,23 +57,41 @@ use super::{ use crate::{ arch::mm::{PageTableEntry, PagingConsts}, boot::memory_region::MemoryRegionType, - mm::{page_table::largest_pages, PagingLevel}, + const_assert, + mm::{page_table::largest_pages, PagingLevel, PAGE_SIZE}, task::disable_preempt, }; -/// The shortest supported address width is 39 bits. And the literal -/// values are written for 48 bits address width. Adjust the values -/// by arithmetic left shift. -const ADDR_WIDTH_SHIFT: isize = PagingConsts::ADDRESS_WIDTH as isize - 48; +// The shortest supported address width is 39 bits. So the literal +// values are written for 39 bits address width and we adjust the values +// by arithmetic left shift. +const_assert!(PagingConsts::ADDRESS_WIDTH >= 39); +const ADDR_WIDTH_SHIFT: usize = PagingConsts::ADDRESS_WIDTH - 39; /// Start of the kernel address space. -/// This is the _lowest_ address of the x86-64's _high_ canonical addresses. #[cfg(not(target_arch = "loongarch64"))] -pub const KERNEL_BASE_VADDR: Vaddr = 0xffff_8000_0000_0000 << ADDR_WIDTH_SHIFT; +pub const KERNEL_BASE_VADDR: Vaddr = 0xffff_ffc0_0000_0000 << ADDR_WIDTH_SHIFT; #[cfg(target_arch = "loongarch64")] -pub const KERNEL_BASE_VADDR: Vaddr = 0x9000_0000_0000_0000 << ADDR_WIDTH_SHIFT; +pub const KERNEL_BASE_VADDR: Vaddr = 0x9000_0000_0000_0000; /// End of the kernel address space (non inclusive). -pub const KERNEL_END_VADDR: Vaddr = 0xffff_ffff_ffff_0000 << ADDR_WIDTH_SHIFT; +pub const KERNEL_END_VADDR: Vaddr = 0xffff_ffff_ffff_0000; + +/// The maximum virtual address of user space (non inclusive). +/// +/// A typical way to reserve half of the address space for the kernel is +/// to use the highest `ADDRESS_WIDTH`-bit virtual address space. +/// +/// Also, the top page is not regarded as usable since it's a workaround +/// for some x86_64 CPUs' bugs. See +/// +/// for the rationale. +pub const MAX_USERSPACE_VADDR: Vaddr = (0x0000_0040_0000_0000 << ADDR_WIDTH_SHIFT) - PAGE_SIZE; + +/// The kernel address space. +/// +/// They are the high canonical addresses (i.e., the negative part of the +/// address space, with the most significant bits in the addresses set). +pub const KERNEL_VADDR_RANGE: Range = KERNEL_BASE_VADDR..KERNEL_END_VADDR; /// The kernel code is linear mapped to this address. /// @@ -85,26 +103,26 @@ pub fn kernel_loaded_offset() -> usize { } #[cfg(target_arch = "x86_64")] -const KERNEL_CODE_BASE_VADDR: usize = 0xffff_ffff_8000_0000 << ADDR_WIDTH_SHIFT; +const KERNEL_CODE_BASE_VADDR: usize = 0xffff_ffff_8000_0000; #[cfg(target_arch = "riscv64")] -const KERNEL_CODE_BASE_VADDR: usize = 0xffff_ffff_0000_0000 << ADDR_WIDTH_SHIFT; +const KERNEL_CODE_BASE_VADDR: usize = 0xffff_ffff_0000_0000; #[cfg(target_arch = "loongarch64")] -const KERNEL_CODE_BASE_VADDR: usize = 0x9000_0000_0000_0000 << ADDR_WIDTH_SHIFT; +const KERNEL_CODE_BASE_VADDR: usize = 0x9000_0000_0000_0000; -const FRAME_METADATA_CAP_VADDR: Vaddr = 0xffff_e100_0000_0000 << ADDR_WIDTH_SHIFT; -const FRAME_METADATA_BASE_VADDR: Vaddr = 0xffff_e000_0000_0000 << ADDR_WIDTH_SHIFT; +const FRAME_METADATA_CAP_VADDR: Vaddr = 0xffff_fff0_8000_0000 << ADDR_WIDTH_SHIFT; +const FRAME_METADATA_BASE_VADDR: Vaddr = 0xffff_fff0_0000_0000 << ADDR_WIDTH_SHIFT; pub(in crate::mm) const FRAME_METADATA_RANGE: Range = FRAME_METADATA_BASE_VADDR..FRAME_METADATA_CAP_VADDR; -const VMALLOC_BASE_VADDR: Vaddr = 0xffff_c000_0000_0000 << ADDR_WIDTH_SHIFT; +const VMALLOC_BASE_VADDR: Vaddr = 0xffff_ffe0_0000_0000 << ADDR_WIDTH_SHIFT; pub const VMALLOC_VADDR_RANGE: Range = VMALLOC_BASE_VADDR..FRAME_METADATA_BASE_VADDR; /// The base address of the linear mapping of all physical /// memory in the kernel address space. #[cfg(not(target_arch = "loongarch64"))] -pub const LINEAR_MAPPING_BASE_VADDR: Vaddr = 0xffff_8000_0000_0000 << ADDR_WIDTH_SHIFT; +pub const LINEAR_MAPPING_BASE_VADDR: Vaddr = 0xffff_ffc0_0000_0000 << ADDR_WIDTH_SHIFT; #[cfg(target_arch = "loongarch64")] -pub const LINEAR_MAPPING_BASE_VADDR: Vaddr = 0x9000_0000_0000_0000 << ADDR_WIDTH_SHIFT; +pub const LINEAR_MAPPING_BASE_VADDR: Vaddr = 0x9000_0000_0000_0000; pub const LINEAR_MAPPING_VADDR_RANGE: Range = LINEAR_MAPPING_BASE_VADDR..VMALLOC_BASE_VADDR; /// Convert physical address to virtual address using offset, only available inside `ostd` diff --git a/ostd/src/mm/mod.rs b/ostd/src/mm/mod.rs index 964958d14..11d79fcdb 100644 --- a/ostd/src/mm/mod.rs +++ b/ostd/src/mm/mod.rs @@ -22,7 +22,7 @@ pub mod vm_space; #[cfg(ktest)] mod test; -use core::{fmt::Debug, ops::Range}; +use core::fmt::Debug; pub use self::{ dma::{DmaCoherent, DmaDirection, DmaStream}, @@ -37,6 +37,7 @@ pub use self::{ Fallible, FallibleVmRead, FallibleVmWrite, Infallible, PodAtomic, PodOnce, VmIo, VmIoFill, VmIoOnce, VmReader, VmWriter, }, + kspace::{KERNEL_VADDR_RANGE, MAX_USERSPACE_VADDR}, mem_obj::{HasDaddr, HasPaddr, HasPaddrRange, HasSize}, page_prop::{CachePolicy, PageFlags, PageProperty}, vm_space::VmSpace, @@ -116,24 +117,6 @@ pub(crate) const fn nr_base_per_page(level: PagingLevel) - page_size::(level) / C::BASE_PAGE_SIZE } -/// The maximum virtual address of user space (non inclusive). -/// -/// Typical 64-bit systems have at least 48-bit virtual address space. -/// A typical way to reserve half of the address space for the kernel is -/// to use the highest 48-bit virtual address space. -/// -/// Also, the top page is not regarded as usable since it's a workaround -/// for some x86_64 CPUs' bugs. See -/// -/// for the rationale. -pub const MAX_USERSPACE_VADDR: Vaddr = 0x0000_8000_0000_0000 - PAGE_SIZE; - -/// The kernel address space. -/// -/// There are the high canonical addresses defined in most 48-bit width -/// architectures. -pub const KERNEL_VADDR_RANGE: Range = 0xffff_8000_0000_0000..0xffff_ffff_ffff_0000; - /// Checks if the given address is page-aligned. pub const fn is_page_aligned(p: usize) -> bool { (p & (PAGE_SIZE - 1)) == 0