Fix races when finding the reaper

This commit is contained in:
Ruihan Li 2025-09-29 13:29:06 +08:00 committed by Jianfeng Jiang
parent 8be5465ace
commit 6336bb9fc8
3 changed files with 42 additions and 46 deletions

View File

@ -748,8 +748,8 @@ fn set_parent_and_group(clone_flags: CloneFlags, parent: &Arc<Process>, child: &
// Update `has_child_subreaper` for the child process to
// make sure the `has_child_subreaper` state of the child process
// will be consistent with its parent.
if real_parent.has_child_subreaper.load(Ordering::Relaxed) {
child.has_child_subreaper.store(true, Ordering::Relaxed);
if real_parent.has_child_subreaper.load(Ordering::Acquire) {
child.has_child_subreaper.store(true, Ordering::Release);
}
// Lock order: children of process -> process table

View File

@ -50,27 +50,32 @@ fn send_parent_death_signal(current_process: &Process) {
///
/// If there is no reaper process for `current_process`, returns `None`.
fn find_reaper_process(current_process: &Process) -> Option<Arc<Process>> {
let mut parent = current_process.parent().lock().process();
let mut parent = current_process.parent().lock().process().upgrade().unwrap();
while let Some(process) = parent.upgrade() {
if is_init_process(&process) {
return Some(process);
loop {
if parent.is_init_process() {
return Some(parent);
}
if !process.has_child_subreaper.load(Ordering::Acquire) {
if !parent.has_child_subreaper.load(Ordering::Acquire) {
return None;
}
let is_reaper = process.is_child_subreaper();
let is_zombie = process.status().is_zombie();
let is_reaper = parent.is_child_subreaper();
let is_zombie = parent.status().is_zombie();
if is_reaper && !is_zombie {
return Some(process);
return Some(parent);
}
parent = process.parent().lock().process();
let grandparent = parent.parent().lock().process().upgrade();
if let Some(grandparent) = grandparent {
parent = grandparent;
} else {
// If both the parent and grandparent have exited concurrently, we will lose the clue
// about the ancestor processes. Therefore, we have to retry.
parent = current_process.parent().lock().process().upgrade().unwrap();
}
}
None
}
/// Moves the children of `current_process` to be the children of `reaper_process`.
@ -80,17 +85,15 @@ fn move_process_children(
current_process: &Process,
reaper_process: &Arc<Process>,
) -> core::result::Result<(), ()> {
// Take the lock first to avoid the race when the `reaper_process` is exiting concurrently.
// Lock order: children of process -> parent of process
let mut reaper_process_children = reaper_process.children().lock();
let Some(reaper_process_children) = reaper_process_children.as_mut() else {
// The reaper process has exited, and it is not the init process
// (since we never clear the init process's children).
return Err(());
};
// Lock order: children of process -> parent of process
// We holds the lock of children while update the children's parents.
// We hold the lock of children while updating the children's parents.
// This ensures when dealing with CLONE_PARENT,
// the retrial will see an up-to-date real parent.
let mut current_children = current_process.children().lock();
@ -106,7 +109,7 @@ fn move_process_children(
/// Moves the children to a reaper process.
fn move_children_to_reaper_process(current_process: &Process) {
if is_init_process(current_process) {
if current_process.is_init_process() {
return;
}
@ -116,11 +119,10 @@ fn move_children_to_reaper_process(current_process: &Process) {
}
}
let Some(init_process) = get_init_process() else {
return;
};
const INIT_PROCESS_PID: Pid = 1;
let _ = move_process_children(current_process, &init_process);
let init_process = process_table::get_process(INIT_PROCESS_PID).unwrap();
move_process_children(current_process, &init_process).unwrap();
}
/// Sends a child-death signal to the parent.
@ -134,14 +136,3 @@ fn send_child_death_signal(current_process: &Process) {
};
parent.children_wait_queue().wake_all();
}
const INIT_PROCESS_PID: Pid = 1;
/// Gets the init process
fn get_init_process() -> Option<Arc<Process>> {
process_table::get_process(INIT_PROCESS_PID)
}
fn is_init_process(process: &Process) -> bool {
process.pid() == INIT_PROCESS_PID
}

View File

@ -308,7 +308,7 @@ impl Process {
}
pub fn is_init_process(&self) -> bool {
self.parent.lock().process().upgrade().is_none()
self.parent.pid() == 0
}
pub(super) fn children(&self) -> &Mutex<Option<BTreeMap<Pid, Arc<Process>>>> {
@ -748,9 +748,13 @@ impl Process {
// ******************* Subreaper ********************
/// Sets the child subreaper attribute of the current process.
///
/// # Panics
///
/// This method may panic if the process is a zombie process.
pub fn set_child_subreaper(&self) {
self.is_child_subreaper.store(true, Ordering::Release);
let has_child_subreaper = self.has_child_subreaper.fetch_or(true, Ordering::AcqRel);
self.is_child_subreaper.store(true, Ordering::Relaxed);
let has_child_subreaper = self.has_child_subreaper.fetch_or(true, Ordering::Release);
if !has_child_subreaper {
self.propagate_has_child_subreaper();
}
@ -758,37 +762,38 @@ impl Process {
/// Unsets the child subreaper attribute of the current process.
pub fn unset_child_subreaper(&self) {
self.is_child_subreaper.store(false, Ordering::Release);
self.is_child_subreaper.store(false, Ordering::Relaxed);
}
/// Returns whether this process is a child subreaper.
pub fn is_child_subreaper(&self) -> bool {
self.is_child_subreaper.load(Ordering::Acquire)
self.is_child_subreaper.load(Ordering::Relaxed)
}
/// Sets all descendants of the current process as having child subreaper.
fn propagate_has_child_subreaper(&self) {
let mut process_queue = VecDeque::new();
let children = self.children().lock();
let Some(children_ref) = children.as_ref() else {
// The current process is exiting group at the same time.
return;
};
for child_process in children_ref.values() {
if !child_process.has_child_subreaper.load(Ordering::Acquire) {
for child_process in children.as_ref().unwrap().values() {
let has_child_subreaper = child_process
.has_child_subreaper
.fetch_or(true, Ordering::Release);
if !has_child_subreaper {
process_queue.push_back(child_process.clone());
}
}
while let Some(process) = process_queue.pop_front() {
process.has_child_subreaper.store(true, Ordering::Release);
let children = process.children().lock();
let Some(children_ref) = children.as_ref() else {
// The process is exiting group at the same time.
continue;
};
for child_process in children_ref.values() {
if !child_process.has_child_subreaper.load(Ordering::Acquire) {
let has_child_subreaper = child_process
.has_child_subreaper
.fetch_or(true, Ordering::Release);
if !has_child_subreaper {
process_queue.push_back(child_process.clone());
}
}