From 659f079c2d41c2cecb3fc06ec62cbece1f67f86e Mon Sep 17 00:00:00 2001 From: "Tate, Hongliang Tian" Date: Sat, 26 Jul 2025 18:52:27 +0800 Subject: [PATCH] Revise the doc of OSTD's scheduling module --- kernel/src/sched/sched_class/fair.rs | 4 + kernel/src/sched/sched_class/mod.rs | 4 +- kernel/src/sched/sched_class/real_time.rs | 4 + ostd/src/task/scheduler/fifo_scheduler.rs | 4 +- ostd/src/task/scheduler/mod.rs | 339 +++++++++++++++++++--- 5 files changed, 318 insertions(+), 37 deletions(-) diff --git a/kernel/src/sched/sched_class/fair.rs b/kernel/src/sched/sched_class/fair.rs index f59092d03..7aceb7fdb 100644 --- a/kernel/src/sched/sched_class/fair.rs +++ b/kernel/src/sched/sched_class/fair.rs @@ -344,6 +344,10 @@ impl SchedClassRq for FairClassRq { rt.period_delta > self.time_slice(weight) || vruntime > self.min_vruntime + self.vtime_slice() } + UpdateFlags::Exit => { + // TODO: consider do more (e.g., time accounting) + true + } } } } diff --git a/kernel/src/sched/sched_class/mod.rs b/kernel/src/sched/sched_class/mod.rs index 7a0b31d38..75d4db026 100644 --- a/kernel/src/sched/sched_class/mod.rs +++ b/kernel/src/sched/sched_class/mod.rs @@ -237,7 +237,7 @@ impl Scheduler for ClassScheduler { should_preempt.then_some(cpu) } - fn local_mut_rq_with(&self, f: &mut dyn FnMut(&mut dyn LocalRunQueue)) { + fn mut_local_rq_with(&self, f: &mut dyn FnMut(&mut dyn LocalRunQueue)) { let guard = disable_local(); let mut lock = self.rqs[guard.current_cpu().as_usize()].lock(); f(&mut *lock) @@ -339,7 +339,7 @@ impl LocalRunQueue for PerCpuClassRqSet { self.current.as_ref().map(|((task, _), _)| task) } - fn pick_next_current(&mut self) -> Option<&Arc> { + fn try_pick_next(&mut self) -> Option<&Arc> { self.pick_next_entity().and_then(|next| { // We guarantee that a task can appear at once in a `PerCpuClassRqSet`. So, the `next` cannot be the same // as the current task here. diff --git a/kernel/src/sched/sched_class/real_time.rs b/kernel/src/sched/sched_class/real_time.rs index 3303c94f7..ab81d0c36 100644 --- a/kernel/src/sched/sched_class/real_time.rs +++ b/kernel/src/sched/sched_class/real_time.rs @@ -220,6 +220,10 @@ impl SchedClassRq for RealTimeClassRq { ts => ts <= rt.period_delta, }, UpdateFlags::Yield => true, + UpdateFlags::Exit => { + // TODO: consider do more (e.g., time accounting) + true + } } } } diff --git a/ostd/src/task/scheduler/fifo_scheduler.rs b/ostd/src/task/scheduler/fifo_scheduler.rs index 6fd3aed1b..484b5c588 100644 --- a/ostd/src/task/scheduler/fifo_scheduler.rs +++ b/ostd/src/task/scheduler/fifo_scheduler.rs @@ -70,7 +70,7 @@ impl Scheduler for FifoScheduler { f(local_rq); } - fn local_mut_rq_with(&self, f: &mut dyn FnMut(&mut dyn LocalRunQueue)) { + fn mut_local_rq_with(&self, f: &mut dyn FnMut(&mut dyn LocalRunQueue)) { let preempt_guard = disable_preempt(); let local_rq: &mut FifoRunQueue = &mut self.rq[preempt_guard.current_cpu().as_usize()] .disable_irq() @@ -102,7 +102,7 @@ impl LocalRunQueue for FifoRunQueue { !matches!(flags, UpdateFlags::Tick) } - fn pick_next_current(&mut self) -> Option<&Arc> { + fn try_pick_next(&mut self) -> Option<&Arc> { let next_task = self.queue.pop_front()?; if let Some(prev_task) = self.current.replace(next_task) { self.queue.push_back(prev_task); diff --git a/ostd/src/task/scheduler/mod.rs b/ostd/src/task/scheduler/mod.rs index 38d553c5b..59ea7584d 100644 --- a/ostd/src/task/scheduler/mod.rs +++ b/ostd/src/task/scheduler/mod.rs @@ -1,9 +1,69 @@ // SPDX-License-Identifier: MPL-2.0 -//! Scheduling subsystem (in-OSTD part). +//! Task scheduling. //! -//! This module defines what OSTD expects from a scheduling implementation -//! and provides useful functions for controlling the execution flow. +//! # Scheduler Injection +//! +//! The task scheduler of an OS is a complex beast, +//! and the most suitable scheduling algorithm often depends on the target usage scenario. +//! To avoid code bloat and offer flexibility, +//! OSTD does not include a gigantic, one-size-fits-all task scheduler. +//! Instead, it allows the client to implement a custom scheduler (in safe Rust, of course) +//! and register it with OSTD. +//! This feature is known as **scheduler injection**. +//! +//! The client kernel performs scheduler injection via the [`inject_scheduler`] API. +//! This API should be called as early as possible during kernel initialization, +//! before any [`Task`]-related APIs are used. +//! This requirement is reasonable since `Task`s depend on the scheduler. +//! +//! # Scheduler Abstraction +//! +//! The `inject_scheduler` API accepts an object implementing the [`Scheduler`] trait, +//! which abstracts over any SMP-aware task scheduler. +//! Whenever an OSTD client spawns a new task (via [`crate::task::TaskOptions`]) +//! or wakes a sleeping task (e.g., via [`crate::sync::Waker`]), +//! OSTD internally forwards the corresponding `Arc` +//! to the scheduler by invoking the [`Scheduler::enqueue`] method. +//! This allows the injected scheduler to manage all runnable tasks. +//! +//! Each enqueued task is dispatched to one of the per-CPU local runqueues, +//! which manage all runnable tasks on a specific CPU. +//! A local runqueue is abstracted by the [`LocalRunQueue`] trait. +//! OSTD accesses the local runqueue of the current CPU +//! via [`Scheduler::local_rq_with`] or [`Scheduler::mut_local_rq_with`], +//! which return immutable and mutable references to `dyn LocalRunQueue`, respectively. +//! +//! The [`LocalRunQueue`] trait enables OSTD to inspect and manipulate local runqueues. +//! For instance, OSTD invokes the [`LocalRunQueue::pick_next`] method +//! to let the scheduler select the next task to run. +//! OSTD then performs a context switch to that task, +//! which becomes the _current_ running task, accessible via [`LocalRunQueue::current`]. +//! When the current task is about to sleep (e.g., via [`crate::sync::Waiter`]), +//! OSTD removes it from the local runqueue using [`LocalRunQueue::dequeue_current`]. +//! +//! The interfaces of `Scheduler` and `LocalRunQueue` are simple +//! yet (perhaps surprisingly) powerful enough to support +//! even complex and advanced task scheduler implementations. +//! Scheduler implementations are free to employ any load-balancing strategy +//! to dispatch enqueued tasks across local runqueues, +//! and each local runqueue is free to choose any prioritization strategy +//! for selecting the next task to run. +//! Based on OSTD's scheduling abstractions, +//! the Asterinas kernel has successfully supported multiple Linux scheduling classes, +//! including both real-time and normal policies. +//! +//! # Safety Impact +//! +//! While OSTD delegates scheduling decisions to the injected task scheduler, +//! it verifies these decisions to avoid undefined behavior. +//! In particular, it enforces the following safety invariant: +//! +//! > A task must not be scheduled to run on more than one CPU at a time. +//! +//! Violating this invariant—e.g., running the same task on two CPUs concurrently— +//! can have catastrophic consequences, +//! as the task's stack and internal state may be corrupted by concurrent modifications. mod fifo_scheduler; pub mod info; @@ -18,14 +78,15 @@ use crate::{ timer, }; -/// Injects a scheduler implementation into framework. +/// Injects a custom implementation of task scheduler into OSTD. /// -/// This function can only be called once and must be called during the initialization of kernel. +/// This function can only be called once and must be called during the initialization phase of kernel, +/// before any [`Task`]-related APIs are invoked. pub fn inject_scheduler(scheduler: &'static dyn Scheduler) { SCHEDULER.call_once(|| scheduler); timer::register_callback(|| { - SCHEDULER.get().unwrap().local_mut_rq_with(&mut |local_rq| { + SCHEDULER.get().unwrap().mut_local_rq_with(&mut |local_rq| { if local_rq.update_current(UpdateFlags::Tick) { cpu_local::set_need_preempt(); } @@ -35,52 +96,262 @@ pub fn inject_scheduler(scheduler: &'static dyn Scheduler) { static SCHEDULER: Once<&'static dyn Scheduler> = Once::new(); -/// A per-CPU task scheduler. +/// A SMP-aware task scheduler. pub trait Scheduler: Sync + Send { /// Enqueues a runnable task. /// - /// Scheduler developers can perform load-balancing or some accounting work here. + /// The scheduler implementer can perform load-balancing or some time accounting work here. /// - /// If the `current` of a CPU needs to be preempted, this method returns the id of - /// that CPU. + /// The newly-enqueued task may have a higher priority than the currently running one on a CPU + /// and thus should preempt the latter. + /// In this case, this method returns the ID of that CPU. fn enqueue(&self, runnable: Arc, flags: EnqueueFlags) -> Option; - /// Gets an immutable access to the local runqueue of the current CPU core. + /// Gets an immutable access to the local runqueue of the current CPU. fn local_rq_with(&self, f: &mut dyn FnMut(&dyn LocalRunQueue)); - /// Gets a mutable access to the local runqueue of the current CPU core. - fn local_mut_rq_with(&self, f: &mut dyn FnMut(&mut dyn LocalRunQueue)); + /// Gets a mutable access to the local runqueue of the current CPU. + fn mut_local_rq_with(&self, f: &mut dyn FnMut(&mut dyn LocalRunQueue)); } -/// The _local_ view of a per-CPU runqueue. +/// A per-CPU, local runqueue. /// -/// This local view provides the interface for the runqueue of a CPU core -/// to be inspected and manipulated by the code running on this particular CPU core. +/// This abstraction allows OSTD to inspect and manipulate local runqueues. /// -/// Conceptually, a local runqueue consists of two parts: -/// (1) a priority queue of runnable tasks; -/// (2) the current running task. -/// The exact definition of "priority" is left for the concrete implementation to decide. +/// Conceptually, a local runqueue maintains: +/// 1. A priority queue of runnable tasks. +/// The definition of "priority" is left to the concrete implementation. +/// 2. The current running task. +/// +/// # Interactions with OSTD +/// +/// ## Overview +/// +/// It is crucial for implementers of `LocalRunQueue` +/// to understand how OSTD interacts with local runqueues. +/// +/// A local runqueue is consulted by OSTD in response to one of four scheduling events: +/// - **Yielding**, triggered by [`Task::yield_now`], where the current task voluntarily gives up CPU time. +/// - **Sleeping**, triggered by [`crate::sync::Waiter::wait`] +/// or any synchronization primitive built upon it (e.g., [`crate::sync::WaitQueue`], [`crate::sync::Mutex`]), +/// which blocks the current task until a wake-up event occurs. +/// - **Ticking**, triggered periodically by the system timer +/// (see [`crate::arch::timer::TIMER_FREQ`]), +/// which provides an opportunity to do time accounting and consider preemption. +/// - **Exiting**, triggered when the execution logic of a task has come to an end, +/// which informs the scheduler that the task is exiting and will never be enqueued again. +/// +/// The general workflow for OSTD to handle a scheduling event is as follows: +/// 1. Acquire exclusive access to the local runqueue using [`Scheduler::mut_local_rq_with`]. +/// 2. Call [`LocalRunQueue::update_current`] to update the current task's state, +/// returning a boolean value that indicates +/// whether the current task should and can be replaced with another runnable task. +/// 3. If the task is about to sleep or exit, call [`LocalRunQueue::dequeue_current`] +/// to remove it from the runqueue. +/// 4. If the return value of `update_current` in Step 2 is true, +/// then select the next task to run with [`LocalRunQueue::pick_next`]. +/// +/// ## When to Pick the Next Task? +/// +/// As shown above, +/// OSTD guarantees that `pick_next` is only called +/// when the current task should and can be replaced. +/// This avoids unnecessary invocations and improves efficiency. +/// +/// But under what conditions should the current task be replaced? +/// Two criteria must be met: +/// 1. There exists at least one other runnable task in the runqueue. +/// 2. That task should preempt the current one, if present. +/// +/// Some implications of these rules: +/// - If the runqueue is empty, `update_current` must return `false`—there's nothing to run. +/// - If the runqueue is non-empty but the current task is absent, +/// `update_current` should return `true`—anything is better than nothing. +/// - If the runqueue is non-empty and the flag is `UpdateFlags::WAIT`, +/// `update_current` should also return `true`, +/// because the current task is about to block. +/// - In other cases, the return value depends on the scheduler's prioritization policy. +/// For instance, a real-time task may only be preempted by a higher-priority task +/// or if it explicitly yields. +/// A normal task under Linux's CFS may be preempted by a task with smaller vruntime, +/// but never by the idle task. +/// +/// When OSTD is unsure about whether the current task should or can be replaced, +/// it will invoke [`LocalRunQueue::try_pick_next`], the fallible version of `pick_next`. +/// +/// ## Intern Working +/// +/// To guide scheduler implementers, +/// we provide a simplified view of how OSTD interacts with local runqueues _internally_ +/// in order to handle the four scheduling events. +/// +/// ### Yielding +/// +/// ``` +/// # use ostd::prelude::*; +/// # use ostd::task::{*, scheduler::*}; +/// # +/// # fn switch_to(next: Arc) {} +/// # +/// /// Yields the current task. +/// fn yield(scheduler: &'static dyn Scheduler) { +/// let next_task_opt: Option> = scheduler.mut_local_rq_with(|local_rq| { +/// let should_pick_next = local_rq.update_current(UpdateFlags::Yield); +/// should_pick_next.then(|| local_rq.pick_next().clone()) +/// }); +/// let Some(next_task) = next_task_opt { +/// switch_to(next_task); +/// } +/// } +/// ``` +/// +/// ### Sleeping +/// +/// ``` +/// # use ostd::prelude::*; +/// # use ostd::task::{*, scheduler::*}; +/// # +/// # fn switch_to(next: Arc) {} +/// # +/// /// Puts the current task to sleep. +/// /// +/// /// The function takes a closure to check if the task is woken. +/// /// This function is used internally to guard against race conditions, +/// /// where the task is woken just before it goes to sleep. +/// fn sleep bool>(scheduler: &'static dyn Scheduler, is_woken: F) { +/// let mut next_task_opt: Option> = None; +/// let mut is_first_try = true; +/// while scheduler.mut_local_rq_with(|local_rq| { +/// if is_first_try { +/// if is_woken() { +/// return false; // exit loop +/// } +/// is_first_try = false; +/// +/// let should_pick_next = local_rq.update_current(UpdateFlags::Wait); +/// let _current = local_rq.dequeue_current(); +/// if !should_pick_next { +/// return true; // continue loop +/// } +/// next_task_opt = Some(local_rq.pick_next().clone()); +/// false // exit loop +/// } else { +/// next_task_opt = local_rq.try_pick_next().cloned(); +/// next_task_opt.is_none() +/// } +/// }) {} +/// let Some(next_task) = next_task_opt { +/// switch_to(next_task); +/// } +/// } +/// ``` +/// +/// ### Ticking +/// +/// ``` +/// # use ostd::prelude::*; +/// # use ostd::task::{*, scheduler::*}; +/// # +/// # fn switch_to(next: Arc) {} +/// # mod cpu_local { +/// # fn set_need_preempt(); +/// # fn should_preempt() -> bool; +/// # } +/// # +/// /// A callback to be invoked periodically by the timer interrupt. +/// fn on_tick(scheduler: &'static dyn Scheduler) { +/// scheduler.mut_local_rq_with(|local_rq| { +/// let should_pick_next = local_rq.update_current(UpdateFlags::Tick); +/// if should_pick_next { +/// cpu_local::set_need_preempt(); +/// } +/// }); +/// } +/// +/// /// A preemption point, called at an earliest convenient timing +/// /// when OSTD can safely preempt the current running task. +/// fn might_preempt(scheduler: &'static dyn Scheduler) { +/// if !cpu_local::should_preempt() { +/// return; +/// } +/// let next_task_opt: Option> = scheduler +/// .mut_local_rq_with(|local_rq| local_rq.try_pick_next().cloned()) +/// let Some(next_task) = next_task_opt { +/// switch_to(next_task); +/// } +/// } +/// ``` +/// +/// ### Exiting +/// +/// ``` +/// # use ostd::prelude::*; +/// # use ostd::task::{*, scheduler::*}; +/// # +/// # fn switch_to(next: Arc) {} +/// # +/// /// Exits the current task. +/// fn exit(scheduler: &'static dyn Scheduler) { +/// let mut next_task_opt: Option> = None; +/// let mut is_first_try = true; +/// while scheduler.mut_local_rq_with(|local_rq| { +/// if is_first_try { +/// is_first_try = false; +/// let should_pick_next = local_rq.update_current(UpdateFlags::Exit); +/// let _current = local_rq.dequeue_current(); +/// if !should_pick_next { +/// return true; // continue loop +/// } +/// next_task_opt = Some(local_rq.pick_next().clone()); +/// false // exit loop +/// } else { +/// next_task_opt = local_rq.try_pick_next().cloned(); +/// next_task_opt.is_none() +/// } +/// }) {} +/// let next_task = next_task_opt.unwrap(); +/// switch_to(next_task); +/// } +/// ``` pub trait LocalRunQueue { /// Gets the current runnable task. fn current(&self) -> Option<&Arc>; - /// Updates the current runnable task's scheduling statistics and potentially its - /// position in the queue. + /// Updates the current runnable task's scheduling statistics and + /// potentially its position in the runqueue. /// - /// If the current runnable task needs to be preempted, the method returns `true`. + /// The return value of this method indicates whether an invocation of `pick_next` should be followed + /// to find another task to replace the current one. fn update_current(&mut self, flags: UpdateFlags) -> bool; - /// Picks the next current runnable task. + /// Picks the next runnable task. /// - /// This method returns the chosen next current runnable task. If there is no - /// candidate for next current runnable task, this method returns `None`. - fn pick_next_current(&mut self) -> Option<&Arc>; + /// This method instructs the local runqueue to pick the next runnable task and replace the current one. + /// A reference to the new "current" task will be returned by this method. + /// If the "old" current task presents, then it is still runnable and thus remains in the runqueue. + /// + /// # Panics + /// + /// As explained in the type-level Rust doc, + /// this method will only be invoked by OSTD after a call to `update_current` returns true. + /// In case that this contract is broken by the caller, + /// the implementer is free to exhibit any undesirable or incorrect behaviors, include panicking. + fn pick_next(&mut self) -> &Arc { + self.try_pick_next().unwrap() + } + + /// Tries to pick the next runnable task. + /// + /// This method instructs the local runqueue to pick the next runnable task on a best-effort basis. + /// If such a task can be picked, then this task supersedes the current task and + /// the new the method returns a reference to the new "current" task. + /// If the "old" current task presents, then it is still runnable and thus remains in the runqueue. + fn try_pick_next(&mut self) -> Option<&Arc>; /// Removes the current runnable task from runqueue. /// - /// This method returns the current runnable task. If there is no current runnable - /// task, this method returns `None`. + /// This method returns the current runnable task. + /// If there is no current runnable task, this method returns `None`. fn dequeue_current(&mut self) -> Option>; } @@ -102,6 +373,8 @@ pub enum UpdateFlags { Wait, /// Task yielding. Yield, + /// Task exiting. + Exit, } /// Preempts the current task. @@ -144,7 +417,7 @@ where current = local_rq.dequeue_current(); } - if let Some(next_task) = local_rq.pick_next_current() { + if let Some(next_task) = local_rq.try_pick_next() { if Arc::ptr_eq(current.as_ref().unwrap(), next_task) { return ReschedAction::DoNothing; } @@ -208,7 +481,7 @@ fn set_need_preempt(cpu_id: CpuId) { pub(super) fn exit_current() -> ! { reschedule(|local_rq: &mut dyn LocalRunQueue| { let _ = local_rq.dequeue_current(); - if let Some(next_task) = local_rq.pick_next_current() { + if let Some(next_task) = local_rq.try_pick_next() { ReschedAction::SwitchTo(next_task.clone()) } else { ReschedAction::Retry @@ -223,7 +496,7 @@ pub(super) fn exit_current() -> ! { pub(super) fn yield_now() { reschedule(|local_rq| { local_rq.update_current(UpdateFlags::Yield); - if let Some(next_task) = local_rq.pick_next_current() { + if let Some(next_task) = local_rq.try_pick_next() { ReschedAction::SwitchTo(next_task.clone()) } else { ReschedAction::DoNothing @@ -246,7 +519,7 @@ where let next_task = loop { let mut action = ReschedAction::DoNothing; - SCHEDULER.get().unwrap().local_mut_rq_with(&mut |rq| { + SCHEDULER.get().unwrap().mut_local_rq_with(&mut |rq| { action = f(rq); });