Support RISC-V Sv39 Paging mode

This commit is contained in:
Zejun Zhao 2025-10-28 17:05:01 +08:00 committed by Ruihan Li
parent ff84f60489
commit 6af524b451
7 changed files with 109 additions and 47 deletions

View File

@ -77,6 +77,8 @@ loongArch64 = "0.2.5"
all = ["cvm_guest"]
cvm_guest = ["dep:tdx-guest", "ostd/cvm_guest", "aster-virtio/cvm_guest"]
coverage = ["ostd/coverage"]
# By default we use the Sv48 address translation mode.
riscv_sv39_mode = ["ostd/riscv_sv39_mode"]
[lints]
workspace = true

View File

@ -75,6 +75,7 @@ default = ["cvm_guest"]
# The guest OS support for Confidential VMs (CVMs), e.g., Intel TDX
cvm_guest = ["dep:tdx-guest", "dep:iced-x86"]
coverage = ["minicov"]
riscv_sv39_mode = []
[lints]
workspace = true

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: MPL-2.0 */
SATP_MODE_SV39 = 8 << 60
SATP_MODE_SV48 = 9 << 60
SATP_PPN_SHIFT = 0
@ -22,22 +23,36 @@ _start:
# a0 = hart id
# a1 = device tree paddr (not touched)
# Set up the page table.
# boot_l4pt[511] = (PPN(boot_l3pt) << PTE_PPN_SHIFT) | PTE_V
lla t1, boot_l4pt
# Set up the Sv48 page table.
# sv48_boot_l4pt[511] = (PPN(sv48_boot_l3pt) << PTE_PPN_SHIFT) | PTE_V
lla t1, sv48_boot_l4pt
li t0, 511 * PTE_SIZE
add t1, t1, t0
lla t0, boot_l3pt
lla t0, sv48_boot_l3pt
srli t0, t0, PAGE_SHIFT - PTE_PPN_SHIFT
ori t0, t0, PTE_V
sd t0, 0(t1)
# Load the page table.
lla t0, boot_l4pt
# Try loading the Sv48 page table.
lla t0, sv48_boot_l4pt
li t1, SATP_MODE_SV48
srli t0, t0, PAGE_SHIFT - SATP_PPN_SHIFT
or t0, t0, t1
csrw satp, t0
# Check if the write to satp succeeds. If not, try Sv39.
# Reference: <https://riscv.github.io/riscv-isa-manual/snapshot/privileged/#satp>.
csrr t1, satp
beq t0, t1, flush_tlb
# Try loading the Sv39 page table.
la t0, sv39_boot_l3pt
li t1, SATP_MODE_SV39
srli t0, t0, PAGE_SHIFT - SATP_PPN_SHIFT
or t0, t0, t1
csrw satp, t0
flush_tlb:
sfence.vma
# Update SP/PC to use the virtual address.
@ -51,19 +66,38 @@ _start:
PTE_VRWX = PTE_V | PTE_R | PTE_W | PTE_X
.balign 4096
boot_l4pt:
.quad (0x00000 << PTE_PPN_SHIFT) | PTE_VRWX # identity 0~512 GiB
sv48_boot_l4pt:
.quad (0x0 << PTE_PPN_SHIFT) | PTE_VRWX # identity 0~512 GiB
.zero 255 * PTE_SIZE
.quad (0x00000 << PTE_PPN_SHIFT) | PTE_VRWX # linear 0~512 GiB
.quad (0x0 << PTE_PPN_SHIFT) | PTE_VRWX # linear 0~512 GiB
.zero 254 * PTE_SIZE
.quad 0 # TBA (-> boot_l3pt)
boot_l3pt: # 0xffff_ffff_0000_0000 -> 0x0000_0000_0000_0000
sv48_boot_l3pt: # 0xffff_ffff_0000_0000 -> 0x0000_0000_0000_0000
.zero 508 * PTE_SIZE
.quad (0x00000 << PTE_PPN_SHIFT) | PTE_VRWX # code 0~1 GiB
.quad (0x40000 << PTE_PPN_SHIFT) | PTE_VRWX # code 1~2 GiB
.quad (0x80000 << PTE_PPN_SHIFT) | PTE_VRWX # code 2~3 GiB
.quad 0
.balign 4096
sv39_boot_l3pt:
.set i, 0
.rept 128 # identity 0~128 GiB
.quad ((i * 0x40000) << PTE_PPN_SHIFT) | PTE_VRWX
.set i, i + 1
.endr
.zero 128 * PTE_SIZE
.set i, 0
.rept 128 # linear 0~128 GiB
.quad ((i * 0x40000) << PTE_PPN_SHIFT) | PTE_VRWX
.set i, i + 1
.endr
.zero 124 * PTE_SIZE
.quad (0x00000 << PTE_PPN_SHIFT) | PTE_VRWX # code 0~1 GiB
.quad (0x40000 << PTE_PPN_SHIFT) | PTE_VRWX # code 1~2 GiB
.quad (0x80000 << PTE_PPN_SHIFT) | PTE_VRWX # code 2~3 GiB
.quad 0
.section ".boot.stack", "aw", @nobits
boot_stack_bottom:

View File

@ -26,6 +26,7 @@ mod util;
#[derive(Clone, Debug, Default)]
pub(crate) struct PagingConsts {}
#[cfg(not(feature = "riscv_sv39_mode"))]
impl PagingConstsTrait for PagingConsts {
const BASE_PAGE_SIZE: usize = 4096;
const NR_LEVELS: PagingLevel = 4;
@ -35,6 +36,16 @@ impl PagingConstsTrait for PagingConsts {
const PTE_SIZE: usize = size_of::<PageTableEntry>();
}
#[cfg(feature = "riscv_sv39_mode")]
impl PagingConstsTrait for PagingConsts {
const BASE_PAGE_SIZE: usize = 4096;
const NR_LEVELS: PagingLevel = 3;
const ADDRESS_WIDTH: usize = 39;
const VA_SIGN_EXT: bool = true;
const HIGHEST_TRANSLATION_LEVEL: PagingLevel = 2;
const PTE_SIZE: usize = size_of::<PageTableEntry>();
}
bitflags::bitflags! {
#[derive(Pod)]
#[repr(C)]
@ -153,8 +164,14 @@ pub(crate) struct PageTableEntry(usize);
pub(crate) unsafe fn activate_page_table(root_paddr: Paddr, _root_pt_cache: CachePolicy) {
assert!(root_paddr % PagingConsts::BASE_PAGE_SIZE == 0);
let ppn = root_paddr >> 12;
#[cfg(not(feature = "riscv_sv39_mode"))]
let mode = riscv::register::satp::Mode::Sv48;
#[cfg(feature = "riscv_sv39_mode")]
let mode = riscv::register::satp::Mode::Sv39;
unsafe {
riscv::register::satp::set(riscv::register::satp::Mode::Sv48, 0, ppn);
riscv::register::satp::set(mode, 0, ppn);
}
}

View File

@ -161,6 +161,13 @@ fn invoke_ffi_init_funcs() {
}
}
mod feature_validation {
#[cfg(all(not(target_arch = "riscv64"), feature = "riscv_sv39_mode"))]
compile_error!(
"feature \"riscv_sv39_mode\" cannot be specified for architectures other than RISC-V"
);
}
/// Simple unit tests for the ktest framework.
#[cfg(ktest)]
mod test {

View File

@ -57,23 +57,41 @@ use super::{
use crate::{
arch::mm::{PageTableEntry, PagingConsts},
boot::memory_region::MemoryRegionType,
mm::{page_table::largest_pages, PagingLevel},
const_assert,
mm::{page_table::largest_pages, PagingLevel, PAGE_SIZE},
task::disable_preempt,
};
/// The shortest supported address width is 39 bits. And the literal
/// values are written for 48 bits address width. Adjust the values
/// by arithmetic left shift.
const ADDR_WIDTH_SHIFT: isize = PagingConsts::ADDRESS_WIDTH as isize - 48;
// The shortest supported address width is 39 bits. So the literal
// values are written for 39 bits address width and we adjust the values
// by arithmetic left shift.
const_assert!(PagingConsts::ADDRESS_WIDTH >= 39);
const ADDR_WIDTH_SHIFT: usize = PagingConsts::ADDRESS_WIDTH - 39;
/// Start of the kernel address space.
/// This is the _lowest_ address of the x86-64's _high_ canonical addresses.
#[cfg(not(target_arch = "loongarch64"))]
pub const KERNEL_BASE_VADDR: Vaddr = 0xffff_8000_0000_0000 << ADDR_WIDTH_SHIFT;
pub const KERNEL_BASE_VADDR: Vaddr = 0xffff_ffc0_0000_0000 << ADDR_WIDTH_SHIFT;
#[cfg(target_arch = "loongarch64")]
pub const KERNEL_BASE_VADDR: Vaddr = 0x9000_0000_0000_0000 << ADDR_WIDTH_SHIFT;
pub const KERNEL_BASE_VADDR: Vaddr = 0x9000_0000_0000_0000;
/// End of the kernel address space (non inclusive).
pub const KERNEL_END_VADDR: Vaddr = 0xffff_ffff_ffff_0000 << ADDR_WIDTH_SHIFT;
pub const KERNEL_END_VADDR: Vaddr = 0xffff_ffff_ffff_0000;
/// The maximum virtual address of user space (non inclusive).
///
/// A typical way to reserve half of the address space for the kernel is
/// to use the highest `ADDRESS_WIDTH`-bit virtual address space.
///
/// Also, the top page is not regarded as usable since it's a workaround
/// for some x86_64 CPUs' bugs. See
/// <https://github.com/torvalds/linux/blob/480e035fc4c714fb5536e64ab9db04fedc89e910/arch/x86/include/asm/page_64.h#L68-L78>
/// for the rationale.
pub const MAX_USERSPACE_VADDR: Vaddr = (0x0000_0040_0000_0000 << ADDR_WIDTH_SHIFT) - PAGE_SIZE;
/// The kernel address space.
///
/// They are the high canonical addresses (i.e., the negative part of the
/// address space, with the most significant bits in the addresses set).
pub const KERNEL_VADDR_RANGE: Range<Vaddr> = KERNEL_BASE_VADDR..KERNEL_END_VADDR;
/// The kernel code is linear mapped to this address.
///
@ -85,26 +103,26 @@ pub fn kernel_loaded_offset() -> usize {
}
#[cfg(target_arch = "x86_64")]
const KERNEL_CODE_BASE_VADDR: usize = 0xffff_ffff_8000_0000 << ADDR_WIDTH_SHIFT;
const KERNEL_CODE_BASE_VADDR: usize = 0xffff_ffff_8000_0000;
#[cfg(target_arch = "riscv64")]
const KERNEL_CODE_BASE_VADDR: usize = 0xffff_ffff_0000_0000 << ADDR_WIDTH_SHIFT;
const KERNEL_CODE_BASE_VADDR: usize = 0xffff_ffff_0000_0000;
#[cfg(target_arch = "loongarch64")]
const KERNEL_CODE_BASE_VADDR: usize = 0x9000_0000_0000_0000 << ADDR_WIDTH_SHIFT;
const KERNEL_CODE_BASE_VADDR: usize = 0x9000_0000_0000_0000;
const FRAME_METADATA_CAP_VADDR: Vaddr = 0xffff_e100_0000_0000 << ADDR_WIDTH_SHIFT;
const FRAME_METADATA_BASE_VADDR: Vaddr = 0xffff_e000_0000_0000 << ADDR_WIDTH_SHIFT;
const FRAME_METADATA_CAP_VADDR: Vaddr = 0xffff_fff0_8000_0000 << ADDR_WIDTH_SHIFT;
const FRAME_METADATA_BASE_VADDR: Vaddr = 0xffff_fff0_0000_0000 << ADDR_WIDTH_SHIFT;
pub(in crate::mm) const FRAME_METADATA_RANGE: Range<Vaddr> =
FRAME_METADATA_BASE_VADDR..FRAME_METADATA_CAP_VADDR;
const VMALLOC_BASE_VADDR: Vaddr = 0xffff_c000_0000_0000 << ADDR_WIDTH_SHIFT;
const VMALLOC_BASE_VADDR: Vaddr = 0xffff_ffe0_0000_0000 << ADDR_WIDTH_SHIFT;
pub const VMALLOC_VADDR_RANGE: Range<Vaddr> = VMALLOC_BASE_VADDR..FRAME_METADATA_BASE_VADDR;
/// The base address of the linear mapping of all physical
/// memory in the kernel address space.
#[cfg(not(target_arch = "loongarch64"))]
pub const LINEAR_MAPPING_BASE_VADDR: Vaddr = 0xffff_8000_0000_0000 << ADDR_WIDTH_SHIFT;
pub const LINEAR_MAPPING_BASE_VADDR: Vaddr = 0xffff_ffc0_0000_0000 << ADDR_WIDTH_SHIFT;
#[cfg(target_arch = "loongarch64")]
pub const LINEAR_MAPPING_BASE_VADDR: Vaddr = 0x9000_0000_0000_0000 << ADDR_WIDTH_SHIFT;
pub const LINEAR_MAPPING_BASE_VADDR: Vaddr = 0x9000_0000_0000_0000;
pub const LINEAR_MAPPING_VADDR_RANGE: Range<Vaddr> = LINEAR_MAPPING_BASE_VADDR..VMALLOC_BASE_VADDR;
/// Convert physical address to virtual address using offset, only available inside `ostd`

View File

@ -22,7 +22,7 @@ pub mod vm_space;
#[cfg(ktest)]
mod test;
use core::{fmt::Debug, ops::Range};
use core::fmt::Debug;
pub use self::{
dma::{DmaCoherent, DmaDirection, DmaStream},
@ -37,6 +37,7 @@ pub use self::{
Fallible, FallibleVmRead, FallibleVmWrite, Infallible, PodAtomic, PodOnce, VmIo, VmIoFill,
VmIoOnce, VmReader, VmWriter,
},
kspace::{KERNEL_VADDR_RANGE, MAX_USERSPACE_VADDR},
mem_obj::{HasDaddr, HasPaddr, HasPaddrRange, HasSize},
page_prop::{CachePolicy, PageFlags, PageProperty},
vm_space::VmSpace,
@ -116,24 +117,6 @@ pub(crate) const fn nr_base_per_page<C: PagingConstsTrait>(level: PagingLevel) -
page_size::<C>(level) / C::BASE_PAGE_SIZE
}
/// The maximum virtual address of user space (non inclusive).
///
/// Typical 64-bit systems have at least 48-bit virtual address space.
/// A typical way to reserve half of the address space for the kernel is
/// to use the highest 48-bit virtual address space.
///
/// Also, the top page is not regarded as usable since it's a workaround
/// for some x86_64 CPUs' bugs. See
/// <https://github.com/torvalds/linux/blob/480e035fc4c714fb5536e64ab9db04fedc89e910/arch/x86/include/asm/page_64.h#L68-L78>
/// for the rationale.
pub const MAX_USERSPACE_VADDR: Vaddr = 0x0000_8000_0000_0000 - PAGE_SIZE;
/// The kernel address space.
///
/// There are the high canonical addresses defined in most 48-bit width
/// architectures.
pub const KERNEL_VADDR_RANGE: Range<Vaddr> = 0xffff_8000_0000_0000..0xffff_ffff_ffff_0000;
/// Checks if the given address is page-aligned.
pub const fn is_page_aligned(p: usize) -> bool {
(p & (PAGE_SIZE - 1)) == 0