diff --git a/ostd/src/boot/smp.rs b/ostd/src/boot/smp.rs index d5f957e87..c3f32275e 100644 --- a/ostd/src/boot/smp.rs +++ b/ostd/src/boot/smp.rs @@ -160,12 +160,18 @@ pub(crate) unsafe extern "C" fn ap_early_entry(cpu_id: u32) -> ! { // 3. No remaining `with_borrow` invocations on this CPU from now on. unsafe { crate::mm::page_table::boot_pt::dismiss() }; - // Mark the AP as started. - report_online_and_hw_cpu_id(cpu_id); - log::info!("Processor {} started. Spinning for tasks.", cpu_id); + // Mark the AP as started. The BSP will resume execution once all the APs + // have been marked as such. + // + // From here to the following `tlb_flush_all_excluding_global`, there is no + // TLB coherence because the BSP may not be able to send IPIs to flush the + // TLBs. Do not perform complex operations during this period. + report_online_and_hw_cpu_id(cpu_id); let ap_late_entry = AP_LATE_ENTRY.wait(); + crate::arch::mm::tlb_flush_all_excluding_global(); + ap_late_entry(); Task::yield_now(); diff --git a/ostd/src/io/io_mem/mod.rs b/ostd/src/io/io_mem/mod.rs index 123d7bc49..eb307cf26 100644 --- a/ostd/src/io/io_mem/mod.rs +++ b/ostd/src/io/io_mem/mod.rs @@ -15,13 +15,16 @@ pub(crate) use self::allocator::IoMemAllocatorBuilder; pub(super) use self::allocator::init; use crate::{ Error, + cpu::{AtomicCpuSet, CpuSet}, mm::{ HasPaddr, HasSize, Infallible, PAGE_SIZE, Paddr, PodOnce, VmReader, VmWriter, io_util::{HasVmReaderWriter, VmReaderWriterIdentity}, kspace::kvirt_area::KVirtArea, page_prop::{CachePolicy, PageFlags, PageProperty, PrivilegedPageFlags}, + tlb::{TlbFlushOp, TlbFlusher}, }, prelude::*, + task::disable_preempt, }; /// A marker type used for [`IoMem`], @@ -115,9 +118,19 @@ impl IoMem { priv_flags, }; - // SAFETY: The caller of `IoMem::new()` ensures that the given - // physical address range is I/O memory, so it is safe to map. - let kva = unsafe { KVirtArea::map_untracked_frames(area_size, 0, frames_range, prop) }; + let kva = { + // SAFETY: The caller of `IoMem::new()` ensures that the given + // physical address range is I/O memory, so it is safe to map. + let kva = unsafe { KVirtArea::map_untracked_frames(area_size, 0, frames_range, prop) }; + + let target_cpus = AtomicCpuSet::new(CpuSet::new_full()); + let mut flusher = TlbFlusher::new(&target_cpus, disable_preempt()); + flusher.issue_tlb_flush(TlbFlushOp::for_range(kva.range())); + flusher.dispatch_tlb_flush(); + flusher.sync_tlb_flush(); + + kva + }; Self { kvirt_area: Arc::new(kva), diff --git a/ostd/src/mm/tlb.rs b/ostd/src/mm/tlb.rs index 054ffae83..c9ef01149 100644 --- a/ostd/src/mm/tlb.rs +++ b/ostd/src/mm/tlb.rs @@ -18,6 +18,7 @@ use crate::{ const_assert, cpu::{AtomicCpuSet, CpuSet, PinCurrentCpu}, cpu_local, + smp::IpiSender, sync::{LocalIrqDisabled, SpinLock}, }; @@ -28,6 +29,7 @@ pub struct TlbFlusher<'a, G: PinCurrentCpu> { target_cpus: &'a AtomicCpuSet, have_unsynced_flush: CpuSet, ops_stack: OpsStack, + ipi_sender: Option<&'static IpiSender>, _pin_current: G, } @@ -44,6 +46,7 @@ impl<'a, G: PinCurrentCpu> TlbFlusher<'a, G> { target_cpus, have_unsynced_flush: CpuSet::new_empty(), ops_stack: OpsStack::new(), + ipi_sender: crate::smp::IPI_SENDER.get(), _pin_current: pin_current_guard, } } @@ -97,20 +100,20 @@ impl<'a, G: PinCurrentCpu> TlbFlusher<'a, G> { need_flush_on_self = true; } - for cpu in target_cpus.iter() { - { + if let Some(ipi_sender) = self.ipi_sender { + for cpu in target_cpus.iter() { + self.have_unsynced_flush.add(cpu); + let mut flush_ops = FLUSH_OPS.get_on_cpu(cpu).lock(); flush_ops.push_from(&self.ops_stack); - // Clear ACK before dropping the lock to avoid false ACKs. ACK_REMOTE_FLUSH .get_on_cpu(cpu) .store(false, Ordering::Relaxed); } - self.have_unsynced_flush.add(cpu); - } - crate::smp::inter_processor_call(&target_cpus, do_remote_flush); + ipi_sender.inter_processor_call(&target_cpus, do_remote_flush); + } // Flush ourselves after sending all IPIs to save some time. if need_flush_on_self { @@ -136,6 +139,12 @@ impl<'a, G: PinCurrentCpu> TlbFlusher<'a, G> { /// processed in IRQs, two CPUs may deadlock if they are waiting for each /// other's TLB coherence. pub fn sync_tlb_flush(&mut self) { + if self.ipi_sender.is_none() { + // We performed some TLB flushes in the boot context. The AP's boot + // process should take care of them. + return; + } + assert!( irq::is_local_enabled(), "Waiting for remote flush with IRQs disabled" diff --git a/ostd/src/power.rs b/ostd/src/power.rs index 55d39bb49..81cee94e5 100644 --- a/ostd/src/power.rs +++ b/ostd/src/power.rs @@ -2,11 +2,9 @@ //! Power management. -use core::sync::atomic::Ordering; - use spin::Once; -use crate::{arch::irq::disable_local_and_halt, cpu::CpuSet, smp::inter_processor_call}; +use crate::{arch::irq::disable_local_and_halt, cpu::CpuSet}; /// An exit code that denotes the reason for restarting or powering off. /// @@ -87,10 +85,10 @@ pub fn poweroff(code: ExitCode) -> ! { fn machine_halt() -> ! { log::error!("Halting the machine..."); - // TODO: `inter_processor_call` may panic again (e.g., if IPIs have not been initialized or if - // there is an out-of-memory error). We should find a way to make it panic-free. - if !crate::IN_BOOTSTRAP_CONTEXT.load(Ordering::Relaxed) { - inter_processor_call(&CpuSet::new_full(), || disable_local_and_halt()); + // TODO: `inter_processor_call` may panic again (e.g., if there is an out-of-memory error). We + // should find a way to make it panic-free. + if let Some(ipi_sender) = crate::smp::IPI_SENDER.get() { + ipi_sender.inter_processor_call(&CpuSet::new_full(), || disable_local_and_halt()); } disable_local_and_halt(); } diff --git a/ostd/src/smp.rs b/ostd/src/smp.rs index a98c333c3..e35c00310 100644 --- a/ostd/src/smp.rs +++ b/ostd/src/smp.rs @@ -32,37 +32,52 @@ use crate::{ /// The function `f` will be executed asynchronously on the target processors. /// However if called on the current processor, it will be synchronous. pub fn inter_processor_call(targets: &CpuSet, f: fn()) { - let irq_guard = irq::disable_local(); - let this_cpu_id = irq_guard.current_cpu(); - - let ipi_data = IPI_GLOBAL_DATA.get().unwrap(); - - let mut call_on_self = false; - for cpu_id in targets.iter() { - if cpu_id == this_cpu_id { - call_on_self = true; - continue; - } - CALL_QUEUES.get_on_cpu(cpu_id).lock().push_back(f); - } - for cpu_id in targets.iter() { - if cpu_id == this_cpu_id { - continue; - } - let hw_cpu_id = ipi_data.hw_cpu_ids[cpu_id.as_usize()]; - crate::arch::irq::send_ipi(hw_cpu_id, &irq_guard as _); - } - if call_on_self { - // Execute the function synchronously. - f(); - } + let ipi_sender = IPI_SENDER.get().unwrap(); + ipi_sender.inter_processor_call(targets, f); } -struct IpiGlobalData { +/// A sender that carries necessary information to send inter-processor interrupts. +/// +/// The purpose of exporting this type is to enable the users to check whether +/// [`IPI_SENDER`] has been initialized. +pub(crate) struct IpiSender { hw_cpu_ids: Box<[HwCpuId]>, } -static IPI_GLOBAL_DATA: Once = Once::new(); +/// The [`IpiSender`] singleton. +pub(crate) static IPI_SENDER: Once = Once::new(); + +impl IpiSender { + /// Executes a function on other processors. + /// + /// See [`inter_processor_call`] for details. The purpose of exporting this + /// method is to enable callers to check whether [`IPI_SENDER`] has been + /// initialized. + pub(crate) fn inter_processor_call(&self, targets: &CpuSet, f: fn()) { + let irq_guard = irq::disable_local(); + let this_cpu_id = irq_guard.current_cpu(); + + let mut call_on_self = false; + for cpu_id in targets.iter() { + if cpu_id == this_cpu_id { + call_on_self = true; + continue; + } + CALL_QUEUES.get_on_cpu(cpu_id).lock().push_back(f); + } + for cpu_id in targets.iter() { + if cpu_id == this_cpu_id { + continue; + } + let hw_cpu_id = self.hw_cpu_ids[cpu_id.as_usize()]; + crate::arch::irq::send_ipi(hw_cpu_id, &irq_guard as _); + } + if call_on_self { + // Execute the function synchronously. + f(); + } + } +} cpu_local! { static CALL_QUEUES: SpinLock> = SpinLock::new(VecDeque::new()); @@ -90,9 +105,8 @@ pub(crate) unsafe fn do_inter_processor_call(_trapframe: &TrapFrame) { } pub(super) fn init() { - IPI_GLOBAL_DATA.call_once(|| { + IPI_SENDER.call_once(|| { let hw_cpu_ids = crate::boot::smp::construct_hw_cpu_id_mapping(); - - IpiGlobalData { hw_cpu_ids } + IpiSender { hw_cpu_ids } }); }