Make TLB flush policy relaxed

This commit is contained in:
Zhang Junyang 2024-09-24 22:45:29 +08:00 committed by Tate, Hongliang Tian
parent 52f1787d35
commit fad39fdf7a
6 changed files with 302 additions and 171 deletions

View File

@ -16,7 +16,7 @@ use align_ext::AlignExt;
use aster_rights::Rights;
use ostd::{
cpu::CpuExceptionInfo,
mm::{PageFlags, PageProperty, VmSpace, MAX_USERSPACE_VADDR},
mm::{tlb::TlbFlushOp, PageFlags, PageProperty, VmSpace, MAX_USERSPACE_VADDR},
};
use self::{
@ -706,6 +706,8 @@ impl Vmar_ {
};
new_cursor.copy_from(&mut cur_cursor, vm_mapping.map_size(), &mut op);
}
cur_cursor.flusher().issue_tlb_flush(TlbFlushOp::All);
cur_cursor.flusher().dispatch_tlb_flush();
}
drop(new_inner);

View File

@ -11,7 +11,8 @@ use core::{
use align_ext::AlignExt;
use aster_rights::Rights;
use ostd::mm::{
vm_space::VmItem, CachePolicy, Frame, FrameAllocOptions, PageFlags, PageProperty, VmSpace,
tlb::TlbFlushOp, vm_space::VmItem, CachePolicy, Frame, FrameAllocOptions, PageFlags,
PageProperty, VmSpace,
};
use super::{interval::Interval, is_intersected, Vmar, Vmar_};
@ -224,7 +225,7 @@ impl VmMapping {
match cursor.query().unwrap() {
VmItem::Mapped {
va: _,
va,
frame,
mut prop,
} if is_write => {
@ -245,7 +246,9 @@ impl VmMapping {
let new_flags = PageFlags::W | PageFlags::ACCESSED | PageFlags::DIRTY;
if self.is_shared || only_reference {
cursor.protect(PAGE_SIZE, |p| p.flags |= new_flags);
cursor.protect_next(PAGE_SIZE, |p| p.flags |= new_flags);
cursor.flusher().issue_tlb_flush(TlbFlushOp::Address(va));
cursor.flusher().dispatch_tlb_flush();
} else {
let new_frame = duplicate_frame(&frame)?;
prop.flags |= new_flags;
@ -558,7 +561,15 @@ impl VmMappingInner {
debug_assert!(range.start % PAGE_SIZE == 0);
debug_assert!(range.end % PAGE_SIZE == 0);
let mut cursor = vm_space.cursor_mut(&range).unwrap();
cursor.protect(range.len(), |p| p.flags = perms.into());
let op = |p: &mut PageProperty| p.flags = perms.into();
while cursor.virt_addr() < range.end {
if let Some(va) = cursor.protect_next(range.end - cursor.virt_addr(), op) {
cursor.flusher().issue_tlb_flush(TlbFlushOp::Range(va));
} else {
break;
}
}
cursor.flusher().dispatch_tlb_flush();
Ok(())
}

View File

@ -18,6 +18,7 @@ pub(crate) mod page;
pub(crate) mod page_prop;
pub(crate) mod page_table;
pub mod stat;
pub mod tlb;
pub mod vm_space;
use core::{fmt::Debug, ops::Range};

View File

@ -823,10 +823,6 @@ where
}
}
pub fn preempt_guard(&self) -> &DisabledPreemptGuard {
&self.0.preempt_guard
}
/// Goes down a level assuming the current slot is absent.
///
/// This method will create a new child page table node and go down to it.

222
ostd/src/mm/tlb.rs Normal file
View File

@ -0,0 +1,222 @@
// SPDX-License-Identifier: MPL-2.0
//! TLB flush operations.
use alloc::vec::Vec;
use core::ops::Range;
use super::{page::DynPage, Vaddr, PAGE_SIZE};
use crate::{
cpu::{CpuSet, PinCurrentCpu},
cpu_local,
sync::SpinLock,
task::disable_preempt,
};
/// A TLB flusher that is aware of which CPUs are needed to be flushed.
///
/// The flusher needs to stick to the current CPU.
pub struct TlbFlusher<G: PinCurrentCpu> {
target_cpus: CpuSet,
// Better to store them here since loading and counting them from the CPUs
// list brings non-trivial overhead.
need_remote_flush: bool,
need_self_flush: bool,
_pin_current: G,
}
impl<G: PinCurrentCpu> TlbFlusher<G> {
/// Creates a new TLB flusher with the specified CPUs to be flushed.
///
/// The flusher needs to stick to the current CPU. So please provide a
/// guard that implements [`PinCurrentCpu`].
pub fn new(target_cpus: CpuSet, pin_current_guard: G) -> Self {
let current_cpu = pin_current_guard.current_cpu();
let mut need_self_flush = false;
let mut need_remote_flush = false;
for cpu in target_cpus.iter() {
if cpu == current_cpu {
need_self_flush = true;
} else {
need_remote_flush = true;
}
}
Self {
target_cpus,
need_remote_flush,
need_self_flush,
_pin_current: pin_current_guard,
}
}
/// Issues a pending TLB flush request.
///
/// On SMP systems, the notification is sent to all the relevant CPUs only
/// when [`Self::dispatch_tlb_flush`] is called.
pub fn issue_tlb_flush(&self, op: TlbFlushOp) {
self.issue_tlb_flush_(op, None);
}
/// Dispatches all the pending TLB flush requests.
///
/// The pending requests are issued by [`Self::issue_tlb_flush`].
pub fn dispatch_tlb_flush(&self) {
if !self.need_remote_flush {
return;
}
crate::smp::inter_processor_call(&self.target_cpus, do_remote_flush);
}
/// Issues a TLB flush request that must happen before dropping the page.
///
/// If we need to remove a mapped page from the page table, we can only
/// recycle the page after all the relevant TLB entries in all CPUs are
/// flushed. Otherwise if the page is recycled for other purposes, the user
/// space program can still access the page through the TLB entries. This
/// method is designed to be used in such cases.
pub fn issue_tlb_flush_with(&self, op: TlbFlushOp, drop_after_flush: DynPage) {
self.issue_tlb_flush_(op, Some(drop_after_flush));
}
/// Whether the TLB flusher needs to flush the TLB entries on other CPUs.
pub fn need_remote_flush(&self) -> bool {
self.need_remote_flush
}
/// Whether the TLB flusher needs to flush the TLB entries on the current CPU.
pub fn need_self_flush(&self) -> bool {
self.need_self_flush
}
fn issue_tlb_flush_(&self, op: TlbFlushOp, drop_after_flush: Option<DynPage>) {
let op = op.optimize_for_large_range();
// Fast path for single CPU cases.
if !self.need_remote_flush {
if self.need_self_flush {
op.perform_on_current();
}
return;
}
// Slow path for multi-CPU cases.
for cpu in self.target_cpus.iter() {
let mut op_queue = FLUSH_OPS.get_on_cpu(cpu).lock();
if let Some(drop_after_flush) = drop_after_flush.clone() {
PAGE_KEEPER.get_on_cpu(cpu).lock().push(drop_after_flush);
}
op_queue.push(op.clone());
}
}
}
/// The operation to flush TLB entries.
#[derive(Debug, Clone)]
pub enum TlbFlushOp {
/// Flush all TLB entries except for the global entries.
All,
/// Flush the TLB entry for the specified virtual address.
Address(Vaddr),
/// Flush the TLB entries for the specified virtual address range.
Range(Range<Vaddr>),
}
impl TlbFlushOp {
/// Performs the TLB flush operation on the current CPU.
pub fn perform_on_current(&self) {
use crate::arch::mm::{
tlb_flush_addr, tlb_flush_addr_range, tlb_flush_all_excluding_global,
};
match self {
TlbFlushOp::All => tlb_flush_all_excluding_global(),
TlbFlushOp::Address(addr) => tlb_flush_addr(*addr),
TlbFlushOp::Range(range) => tlb_flush_addr_range(range),
}
}
fn optimize_for_large_range(self) -> Self {
match self {
TlbFlushOp::Range(range) => {
if range.len() > FLUSH_ALL_RANGE_THRESHOLD {
TlbFlushOp::All
} else {
TlbFlushOp::Range(range)
}
}
_ => self,
}
}
}
// The queues of pending requests on each CPU.
//
// Lock ordering: lock FLUSH_OPS before PAGE_KEEPER.
cpu_local! {
static FLUSH_OPS: SpinLock<OpsStack> = SpinLock::new(OpsStack::new());
static PAGE_KEEPER: SpinLock<Vec<DynPage>> = SpinLock::new(Vec::new());
}
fn do_remote_flush() {
let preempt_guard = disable_preempt();
let current_cpu = preempt_guard.current_cpu();
let mut op_queue = FLUSH_OPS.get_on_cpu(current_cpu).lock();
op_queue.flush_all();
PAGE_KEEPER.get_on_cpu(current_cpu).lock().clear();
}
/// If a TLB flushing request exceeds this threshold, we flush all.
pub(crate) const FLUSH_ALL_RANGE_THRESHOLD: usize = 32 * PAGE_SIZE;
/// If the number of pending requests exceeds this threshold, we flush all the
/// TLB entries instead of flushing them one by one.
const FLUSH_ALL_OPS_THRESHOLD: usize = 32;
struct OpsStack {
ops: [Option<TlbFlushOp>; FLUSH_ALL_OPS_THRESHOLD],
need_flush_all: bool,
size: usize,
}
impl OpsStack {
const fn new() -> Self {
const ARRAY_REPEAT_VALUE: Option<TlbFlushOp> = None;
Self {
ops: [ARRAY_REPEAT_VALUE; FLUSH_ALL_OPS_THRESHOLD],
need_flush_all: false,
size: 0,
}
}
fn push(&mut self, op: TlbFlushOp) {
if self.need_flush_all {
return;
}
if self.size < FLUSH_ALL_OPS_THRESHOLD {
self.ops[self.size] = Some(op);
self.size += 1;
} else {
self.need_flush_all = true;
self.size = 0;
}
}
fn flush_all(&mut self) {
if self.need_flush_all {
crate::arch::mm::tlb_flush_all_excluding_global();
self.need_flush_all = false;
} else {
for i in 0..self.size {
if let Some(op) = &self.ops[i] {
op.perform_on_current();
}
}
}
self.size = 0;
}
}

View File

@ -9,30 +9,25 @@
//! powerful concurrent accesses to the page table, and suffers from the same
//! validity concerns as described in [`super::page_table::cursor`].
use alloc::collections::vec_deque::VecDeque;
use core::{
ops::Range,
sync::atomic::{AtomicPtr, Ordering},
};
use super::{
io::Fallible,
kspace::KERNEL_PAGE_TABLE,
page::DynPage,
page_table::{PageTable, UserMode},
PageProperty, VmReader, VmWriter, PAGE_SIZE,
};
use crate::{
arch::mm::{current_page_table_paddr, PageTableEntry, PagingConsts},
cpu::{num_cpus, CpuExceptionInfo, CpuSet, PinCurrentCpu},
cpu_local,
mm::{
page_table::{self, PageTableItem},
Frame, MAX_USERSPACE_VADDR,
io::Fallible,
kspace::KERNEL_PAGE_TABLE,
page_table::{self, PageTable, PageTableItem, UserMode},
tlb::{TlbFlushOp, TlbFlusher, FLUSH_ALL_RANGE_THRESHOLD},
Frame, PageProperty, VmReader, VmWriter, MAX_USERSPACE_VADDR,
},
prelude::*,
sync::{RwLock, RwLockReadGuard, SpinLock},
task::disable_preempt,
sync::{RwLock, RwLockReadGuard},
task::{disable_preempt, DisabledPreemptGuard},
Error,
};
@ -96,11 +91,7 @@ impl VmSpace {
Ok(self.pt.cursor_mut(va).map(|pt_cursor| {
let activation_lock = self.activation_lock.read();
let cur_cpu = pt_cursor.preempt_guard().current_cpu();
let mut activated_cpus = CpuSet::new_empty();
let mut need_self_flush = false;
let mut need_remote_flush = false;
for cpu in 0..num_cpus() {
// The activation lock is held; other CPUs cannot activate this `VmSpace`.
@ -108,20 +99,13 @@ impl VmSpace {
ACTIVATED_VM_SPACE.get_on_cpu(cpu).load(Ordering::Relaxed) as *const VmSpace;
if ptr == self as *const VmSpace {
activated_cpus.add(cpu);
if cpu == cur_cpu {
need_self_flush = true;
} else {
need_remote_flush = true;
}
}
}
CursorMut {
pt_cursor,
activation_lock,
activated_cpus,
need_remote_flush,
need_self_flush,
flusher: TlbFlusher::new(activated_cpus, disable_preempt()),
}
})?)
}
@ -264,12 +248,9 @@ pub struct CursorMut<'a, 'b> {
pt_cursor: page_table::CursorMut<'a, UserMode, PageTableEntry, PagingConsts>,
#[allow(dead_code)]
activation_lock: RwLockReadGuard<'b, ()>,
// Better to store them here since loading and counting them from the CPUs
// list brings non-trivial overhead. We have a read lock so the stored set
// is always a superset of actual activated CPUs.
activated_cpus: CpuSet,
need_remote_flush: bool,
need_self_flush: bool,
// We have a read lock so the CPU set in the flusher is always a superset
// of actual activated CPUs.
flusher: TlbFlusher<DisabledPreemptGuard>,
}
impl CursorMut<'_, '_> {
@ -298,6 +279,11 @@ impl CursorMut<'_, '_> {
self.pt_cursor.virt_addr()
}
/// Get the dedicated TLB flusher for this cursor.
pub fn flusher(&self) -> &TlbFlusher<DisabledPreemptGuard> {
&self.flusher
}
/// Map a frame into the current slot.
///
/// This method will bring the cursor to the next slot after the modification.
@ -306,9 +292,10 @@ impl CursorMut<'_, '_> {
// SAFETY: It is safe to map untyped memory into the userspace.
let old = unsafe { self.pt_cursor.map(frame.into(), prop) };
if old.is_some() {
self.issue_tlb_flush(TlbFlushOp::Address(start_va), old);
self.dispatch_tlb_flush();
if let Some(old) = old {
self.flusher
.issue_tlb_flush_with(TlbFlushOp::Address(start_va), old);
self.flusher.dispatch_tlb_flush();
}
}
@ -320,25 +307,31 @@ impl CursorMut<'_, '_> {
/// Already-absent mappings encountered by the cursor will be skipped. It
/// is valid to unmap a range that is not mapped.
///
/// It must issue and dispatch a TLB flush after the operation. Otherwise,
/// the memory safety will be compromised. Please call this function less
/// to avoid the overhead of TLB flush. Using a large `len` is wiser than
/// splitting the operation into multiple small ones.
///
/// # Panics
///
/// This method will panic if `len` is not page-aligned.
pub fn unmap(&mut self, len: usize) {
assert!(len % super::PAGE_SIZE == 0);
let end_va = self.virt_addr() + len;
let tlb_prefer_flush_all = len > TLB_FLUSH_ALL_THRESHOLD * PAGE_SIZE;
let tlb_prefer_flush_all = len > FLUSH_ALL_RANGE_THRESHOLD;
loop {
// SAFETY: It is safe to un-map memory in the userspace.
let result = unsafe { self.pt_cursor.take_next(end_va - self.virt_addr()) };
match result {
PageTableItem::Mapped { va, page, .. } => {
if !self.need_remote_flush && tlb_prefer_flush_all {
if !self.flusher.need_remote_flush() && tlb_prefer_flush_all {
// Only on single-CPU cases we can drop the page immediately before flushing.
drop(page);
continue;
}
self.issue_tlb_flush(TlbFlushOp::Address(va), Some(page));
self.flusher
.issue_tlb_flush_with(TlbFlushOp::Address(va), page);
}
PageTableItem::NotMapped { .. } => {
break;
@ -349,41 +342,43 @@ impl CursorMut<'_, '_> {
}
}
if !self.need_remote_flush && tlb_prefer_flush_all {
self.issue_tlb_flush(TlbFlushOp::All, None);
if !self.flusher.need_remote_flush() && tlb_prefer_flush_all {
self.flusher.issue_tlb_flush(TlbFlushOp::All);
}
self.dispatch_tlb_flush();
self.flusher.dispatch_tlb_flush();
}
/// Change the mapping property starting from the current slot.
/// Applies the operation to the next slot of mapping within the range.
///
/// This method will bring the cursor forward by `len` bytes in the virtual
/// address space after the modification.
/// The range to be found in is the current virtual address with the
/// provided length.
///
/// The way to change the property is specified by the closure `op`.
/// The function stops and yields the actually protected range if it has
/// actually protected a page, no matter if the following pages are also
/// required to be protected.
///
/// It also makes the cursor moves forward to the next page after the
/// protected one. If no mapped pages exist in the following range, the
/// cursor will stop at the end of the range and return [`None`].
///
/// Note that it will **NOT** flush the TLB after the operation. Please
/// make the decision yourself on when and how to flush the TLB using
/// [`Self::flusher`].
///
/// # Panics
///
/// This method will panic if `len` is not page-aligned.
pub fn protect(&mut self, len: usize, mut op: impl FnMut(&mut PageProperty)) {
assert!(len % super::PAGE_SIZE == 0);
let end = self.virt_addr() + len;
let tlb_prefer_flush_all = len > TLB_FLUSH_ALL_THRESHOLD * PAGE_SIZE;
/// This function will panic if:
/// - the range to be protected is out of the range where the cursor
/// is required to operate;
/// - the specified virtual address range only covers a part of a page.
pub fn protect_next(
&mut self,
len: usize,
mut op: impl FnMut(&mut PageProperty),
) -> Option<Range<Vaddr>> {
// SAFETY: It is safe to protect memory in the userspace.
while let Some(range) =
unsafe { self.pt_cursor.protect_next(end - self.virt_addr(), &mut op) }
{
if !tlb_prefer_flush_all {
self.issue_tlb_flush(TlbFlushOp::Range(range), None);
}
}
if tlb_prefer_flush_all {
self.issue_tlb_flush(TlbFlushOp::All, None);
}
self.dispatch_tlb_flush();
unsafe { self.pt_cursor.protect_next(len, &mut op) }
}
/// Copies the mapping from the given cursor to the current cursor.
@ -395,6 +390,10 @@ impl CursorMut<'_, '_> {
///
/// After the operation, both cursors will advance by the specified length.
///
/// Note that it will **NOT** flush the TLB after the operation. Please
/// make the decision yourself on when and how to flush the TLB using
/// the source's [`CursorMut::flusher`].
///
/// # Panics
///
/// This function will panic if:
@ -409,81 +408,13 @@ impl CursorMut<'_, '_> {
len: usize,
op: &mut impl FnMut(&mut PageProperty),
) {
let va = src.virt_addr();
// SAFETY: Operations on user memory spaces are safe if it doesn't
// involve dropping any pages.
unsafe { self.pt_cursor.copy_from(&mut src.pt_cursor, len, op) };
if len > TLB_FLUSH_ALL_THRESHOLD * PAGE_SIZE {
src.issue_tlb_flush(TlbFlushOp::All, None);
} else {
src.issue_tlb_flush(TlbFlushOp::Range(va..va + len), None);
}
src.dispatch_tlb_flush();
}
fn issue_tlb_flush(&self, op: TlbFlushOp, drop_after_flush: Option<DynPage>) {
let request = TlbFlushRequest {
op,
drop_after_flush,
};
// Fast path for single CPU cases.
if !self.need_remote_flush {
if self.need_self_flush {
request.do_flush();
}
return;
}
// Slow path for multi-CPU cases.
for cpu in self.activated_cpus.iter() {
let mut queue = TLB_FLUSH_REQUESTS.get_on_cpu(cpu).lock();
queue.push_back(request.clone());
}
}
fn dispatch_tlb_flush(&self) {
if !self.need_remote_flush {
return;
}
fn do_remote_flush() {
let preempt_guard = disable_preempt();
let mut requests = TLB_FLUSH_REQUESTS
.get_on_cpu(preempt_guard.current_cpu())
.lock();
if requests.len() > TLB_FLUSH_ALL_THRESHOLD {
// TODO: in most cases, we need only to flush all the TLB entries
// for an ASID if it is enabled.
crate::arch::mm::tlb_flush_all_excluding_global();
requests.clear();
} else {
while let Some(request) = requests.pop_front() {
request.do_flush();
if matches!(request.op, TlbFlushOp::All) {
requests.clear();
break;
}
}
}
}
crate::smp::inter_processor_call(&self.activated_cpus.clone(), do_remote_flush);
unsafe { self.pt_cursor.copy_from(&mut src.pt_cursor, len, op) }
}
}
/// The threshold used to determine whether we need to flush all TLB entries
/// when handling a bunch of TLB flush requests. If the number of requests
/// exceeds this threshold, the overhead incurred by flushing pages
/// individually would surpass the overhead of flushing all entries at once.
const TLB_FLUSH_ALL_THRESHOLD: usize = 32;
cpu_local! {
/// The queue of pending requests.
static TLB_FLUSH_REQUESTS: SpinLock<VecDeque<TlbFlushRequest>> = SpinLock::new(VecDeque::new());
/// The `Arc` pointer to the activated VM space on this CPU. If the pointer
/// is NULL, it means that the activated page table is merely the kernel
/// page table.
@ -493,38 +424,6 @@ cpu_local! {
static ACTIVATED_VM_SPACE: AtomicPtr<VmSpace> = AtomicPtr::new(core::ptr::null_mut());
}
#[derive(Debug, Clone)]
struct TlbFlushRequest {
op: TlbFlushOp,
// If we need to remove a mapped page from the page table, we can only
// recycle the page after all the relevant TLB entries in all CPUs are
// flushed. Otherwise if the page is recycled for other purposes, the user
// space program can still access the page through the TLB entries.
#[allow(dead_code)]
drop_after_flush: Option<DynPage>,
}
#[derive(Debug, Clone)]
enum TlbFlushOp {
All,
Address(Vaddr),
Range(Range<Vaddr>),
}
impl TlbFlushRequest {
/// Perform the TLB flush operation on the current CPU.
fn do_flush(&self) {
use crate::arch::mm::{
tlb_flush_addr, tlb_flush_addr_range, tlb_flush_all_excluding_global,
};
match &self.op {
TlbFlushOp::All => tlb_flush_all_excluding_global(),
TlbFlushOp::Address(addr) => tlb_flush_addr(*addr),
TlbFlushOp::Range(range) => tlb_flush_addr_range(range),
}
}
}
/// The result of a query over the VM space.
#[derive(Debug)]
pub enum VmItem {