diff --git a/kernel/src/lib.rs b/kernel/src/lib.rs index bfc65f049..5e8a542cf 100644 --- a/kernel/src/lib.rs +++ b/kernel/src/lib.rs @@ -105,7 +105,6 @@ fn init() { time::init(); net::init(); sched::init(); - syscall::init(); process::init(); fs::init(); } diff --git a/kernel/src/net/mod.rs b/kernel/src/net/mod.rs index cabea6831..146d9cad1 100644 --- a/kernel/src/net/mod.rs +++ b/kernel/src/net/mod.rs @@ -2,6 +2,9 @@ pub mod iface; pub mod socket; +mod uts_ns; + +pub use uts_ns::UtsNamespace; pub fn init() { iface::init(); diff --git a/kernel/src/net/uts_ns.rs b/kernel/src/net/uts_ns.rs new file mode 100644 index 000000000..3a9217854 --- /dev/null +++ b/kernel/src/net/uts_ns.rs @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: MPL-2.0 + +use spin::Once; + +use crate::{ + prelude::*, + process::{credentials::capabilities::CapSet, posix_thread::PosixThread, UserNamespace}, + util::padded, +}; + +/// The UTS namespace. +pub struct UtsNamespace { + uts_name: UtsName, + owner: Arc, +} + +impl UtsNamespace { + /// Returns a reference to the singleton initial UTS namespace. + pub fn get_init_singleton() -> &'static Arc { + static INIT: Once> = Once::new(); + + INIT.call_once(|| { + // We intentionally report Linux-like UTS values instead of Asterinas' real + // name and version. These spoofed values satisfy glibc, which inspects + // uname fields (sysname, release, version, etc.) and expects Linux-compatible data. + let uts_name = UtsName { + sysname: padded(b"Linux"), + nodename: padded(b"WHITLEY"), + release: padded(b"5.13.0"), + version: padded(b"5.13.0"), + machine: padded(b"x86_64"), + domainname: padded(b""), + }; + + let owner = UserNamespace::get_init_singleton().clone(); + + Arc::new(Self { uts_name, owner }) + }) + } + + /// Clones a new UTS namespace from `self`. + pub fn new_clone( + &self, + owner: Arc, + posix_thread: &PosixThread, + ) -> Result> { + owner.check_cap(CapSet::SYS_ADMIN, posix_thread)?; + Ok(Arc::new(Self { + uts_name: self.uts_name, + owner, + })) + } + + /// Returns the owner user namespace of the namespace. + pub fn owner_ns(&self) -> &Arc { + &self.owner + } + + /// Returns the UTS name. + pub fn uts_name(&self) -> &UtsName { + &self.uts_name + } +} + +const UTS_FIELD_LEN: usize = 65; + +#[derive(Debug, Clone, Copy, Pod)] +#[repr(C)] +pub struct UtsName { + sysname: [u8; UTS_FIELD_LEN], + nodename: [u8; UTS_FIELD_LEN], + release: [u8; UTS_FIELD_LEN], + version: [u8; UTS_FIELD_LEN], + machine: [u8; UTS_FIELD_LEN], + domainname: [u8; UTS_FIELD_LEN], +} diff --git a/kernel/src/process/clone.rs b/kernel/src/process/clone.rs index afc9633cf..120d66218 100644 --- a/kernel/src/process/clone.rs +++ b/kernel/src/process/clone.rs @@ -20,7 +20,11 @@ use crate::{ thread_info::ThreadFsInfo, }, prelude::*, - process::{pid_file::PidFile, posix_thread::allocate_posix_tid}, + process::{ + pid_file::PidFile, + posix_thread::{allocate_posix_tid, PosixThread, ThreadLocal}, + NsProxy, UserNamespace, + }, sched::Nice, thread::{AsThread, Tid}, }; @@ -28,6 +32,7 @@ use crate::{ bitflags! { #[derive(Default)] pub struct CloneFlags: u32 { + const CLONE_NEWTIME = 0x00000080; /* New time namespace */ const CLONE_VM = 0x00000100; /* Set if VM shared between processes. */ const CLONE_FS = 0x00000200; /* Set if fs info shared between processes. */ const CLONE_FILES = 0x00000400; /* Set if open files shared between processes. */ @@ -52,6 +57,16 @@ bitflags! { const CLONE_NEWPID = 0x20000000; /* New pid namespace. */ const CLONE_NEWNET = 0x40000000; /* New network namespace. */ const CLONE_IO = 0x80000000; /* Clone I/O context. */ + + /// A bitmask of all `CloneFlags` related to namespace creation. + const CLONE_NS_FLAGS = Self::CLONE_NEWTIME.bits() | + Self::CLONE_NEWNS.bits() | + Self::CLONE_NEWCGROUP.bits() | + Self::CLONE_NEWUTS.bits() | + Self::CLONE_NEWIPC.bits() | + Self::CLONE_NEWUSER.bits() | + Self::CLONE_NEWPID.bits() | + Self::CLONE_NEWNET.bits(); } } @@ -241,6 +256,13 @@ fn clone_child_task( ); } + if clone_flags.contains(CloneFlags::CLONE_NEWUSER) { + return_errno_with_message!( + Errno::EINVAL, + "`CLONE_THREAD` cannot be used together with `CLONE_NEWUSER`" + ) + } + let Context { process, thread_local, @@ -260,6 +282,15 @@ fn clone_child_task( // Clone FPU context let child_fpu_context = thread_local.fpu().clone_context(); + // Clone namespaces + let child_user_ns = thread_local.borrow_user_ns().clone(); + let child_ns_proxy = clone_ns_proxy( + thread_local.borrow_ns_proxy().unwrap(), + &child_user_ns, + clone_flags, + posix_thread, + )?; + let child_user_ctx = Box::new(clone_user_ctx( parent_context, clone_args.stack, @@ -287,7 +318,9 @@ fn clone_child_task( .sig_mask(sig_mask) .file_table(child_file_table) .fs(child_fs) - .fpu_context(child_fpu_context); + .fpu_context(child_fpu_context) + .user_ns(child_user_ns) + .ns_proxy(child_ns_proxy); // Deal with SETTID/CLEARTID flags clone_parent_settid(child_tid, clone_args.parent_tid, clone_flags)?; @@ -350,6 +383,15 @@ fn clone_child_process( // Clone FPU context let child_fpu_context = thread_local.fpu().clone_context(); + // Clone the namespaces + let child_user_ns = clone_user_ns(clone_flags, thread_local)?; + let child_ns_proxy = clone_ns_proxy( + thread_local.borrow_ns_proxy().unwrap(), + &child_user_ns, + clone_flags, + posix_thread, + )?; + // Inherit the parent's signal mask let child_sig_mask = posix_thread.sig_mask().load(Ordering::Relaxed).into(); @@ -377,6 +419,8 @@ fn clone_child_process( .file_table(child_file_table) .fs(child_fs) .fpu_context(child_fpu_context) + .user_ns(child_user_ns.clone()) + .ns_proxy(child_ns_proxy) }; // Deal with SETTID/CLEARTID flags @@ -394,6 +438,7 @@ fn clone_child_process( child_resource_limits, child_nice, child_sig_dispositions, + child_user_ns, child_thread_builder, ) }; @@ -568,6 +613,29 @@ fn clone_pidfd( } } +fn clone_user_ns( + clone_flags: CloneFlags, + thread_local: &ThreadLocal, +) -> Result> { + if clone_flags.contains(CloneFlags::CLONE_NEWUSER) { + return_errno_with_message!( + Errno::EINVAL, + "cloning a new user namespace is not supported" + ); + } else { + Ok(thread_local.borrow_user_ns().clone()) + } +} + +fn clone_ns_proxy( + parent_ns_proxy: &Arc, + user_ns: &Arc, + clone_flags: CloneFlags, + posix_thread: &PosixThread, +) -> Result> { + parent_ns_proxy.new_clone(user_ns, clone_flags, posix_thread) +} + #[expect(clippy::too_many_arguments)] fn create_child_process( pid: Pid, @@ -577,6 +645,7 @@ fn create_child_process( resource_limits: ResourceLimits, nice: Nice, sig_dispositions: Arc>, + user_ns: Arc, thread_builder: PosixThreadBuilder, ) -> Arc { let child_proc = Process::new( @@ -587,6 +656,7 @@ fn create_child_process( resource_limits, nice, sig_dispositions, + user_ns, ); let child_task = thread_builder.process(Arc::downgrade(&child_proc)).build(); diff --git a/kernel/src/process/mod.rs b/kernel/src/process/mod.rs index c81e20a31..71945082a 100644 --- a/kernel/src/process/mod.rs +++ b/kernel/src/process/mod.rs @@ -4,6 +4,7 @@ mod clone; pub mod credentials; mod exit; mod kill; +mod namespace; mod pid_file; pub mod posix_thread; #[expect(clippy::module_inception)] @@ -23,6 +24,10 @@ mod wait; pub use clone::{clone_child, CloneArgs, CloneFlags}; pub use credentials::{Credentials, Gid, Uid}; pub use kill::{kill, kill_all, kill_group, tgkill}; +pub use namespace::{ + nsproxy::{check_unsupported_ns_flags, ContextNsAdminApi, NsProxy, NsProxyBuilder}, + user_ns::UserNamespace, +}; pub use pid_file::PidFile; pub use process::{ broadcast_signal_async, enqueue_signal_async, spawn_init_process, ExitCode, JobControl, Pgid, diff --git a/kernel/src/process/namespace/mod.rs b/kernel/src/process/namespace/mod.rs new file mode 100644 index 000000000..db24e027c --- /dev/null +++ b/kernel/src/process/namespace/mod.rs @@ -0,0 +1,4 @@ +// SPDX-License-Identifier: MPL-2.0 + +pub(super) mod nsproxy; +pub(super) mod user_ns; diff --git a/kernel/src/process/namespace/nsproxy.rs b/kernel/src/process/namespace/nsproxy.rs new file mode 100644 index 000000000..034cde294 --- /dev/null +++ b/kernel/src/process/namespace/nsproxy.rs @@ -0,0 +1,130 @@ +// SPDX-License-Identifier: MPL-2.0 + +use spin::Once; + +use crate::{ + net::UtsNamespace, + prelude::*, + process::{posix_thread::PosixThread, CloneFlags, UserNamespace}, +}; + +/// A struct that acts as a per-thread proxy to give access to most namespaces. +/// +/// Each `PosixThread` owns an instance of `NsProxy` +/// and keeps a local copy in `ThreadLocal` for fast access. +/// `NsProxy` contains all types of namespaces except +/// 1. The user namespace, which is included in the `Process` struct. +/// 2. The PID namespace, which is included in the `Process` struct (TODO). +pub struct NsProxy { + uts_ns: Arc, +} + +impl NsProxy { + /// Returns a reference to the singleton initial `NsProxy`. + pub(in crate::process) fn get_init_singleton() -> &'static Arc { + static INIT: Once> = Once::new(); + INIT.call_once(|| { + Arc::new(NsProxy { + uts_ns: UtsNamespace::get_init_singleton().clone(), + }) + }) + } + + /// Creates a new `NsProxy` by cloning from an existing `NsProxy`. + /// + /// If no namespaces need to be cloned, this method simply clones `self` and returns. + /// Otherwise, a new `NsProxy` will be created + /// by selectively cloning fields from the proxy and newly created namespaces. + // + // FIXME: This method is currently used by both `unshare()` and `clone()`. + // Once we support PID and time namespaces, their semantics diverge. + // We will need to refactor (or split) this method accordingly. + pub(in crate::process) fn new_clone( + self: &Arc, + user_ns: &Arc, + clone_flags: CloneFlags, + posix_thread: &PosixThread, + ) -> Result> { + let clone_ns_flags = (clone_flags & CloneFlags::CLONE_NS_FLAGS) - CloneFlags::CLONE_NEWUSER; + + // Fast path: If there are no new namespaces to clone, + // we can directly clone the proxy and return. + if clone_ns_flags.is_empty() { + return Ok(self.clone()); + } + + // Slow path: One or more namespaces need to be cloned, + // so a new `NsProxy` must be created. + + check_unsupported_ns_flags(clone_ns_flags)?; + + let mut builder = NsProxyBuilder::new(self); + + if clone_ns_flags.contains(CloneFlags::CLONE_NEWUTS) { + let uts_ns = self.uts_ns.new_clone(user_ns.clone(), posix_thread)?; + builder.uts_ns(uts_ns); + } + + // TODO: Support other namespaces. + + Ok(Arc::new(builder.build())) + } + + /// Returns the associated UTS namespace. + pub fn uts_ns(&self) -> &Arc { + &self.uts_ns + } +} + +/// A builder for creating a new `NsProxy` by selectively cloning namespaces +/// from an existing one. +pub struct NsProxyBuilder<'a> { + old_proxy: &'a NsProxy, + + // Fields for new namespaces. + uts_ns: Option>, +} + +impl<'a> NsProxyBuilder<'a> { + /// Creates a builder based on an existing `NsProxy`. + pub fn new(old_proxy: &'a NsProxy) -> Self { + Self { + old_proxy, + uts_ns: None, + } + } + + /// Sets the new UTS namespace. + pub fn uts_ns(&mut self, uts_ns: Arc) -> &mut Self { + self.uts_ns = Some(uts_ns); + self + } + + /// Builds the new `NsProxy`. + pub fn build(self) -> NsProxy { + let Self { + old_proxy, + uts_ns: new_uts, + } = self; + + let new_uts = new_uts.unwrap_or_else(|| old_proxy.uts_ns.clone()); + + NsProxy { uts_ns: new_uts } + } +} + +/// Checks if the given `flags` contain any unsupported namespace-related flags. +/// +/// This method does _not_ check CLONE_NEWUSER since it's handled separately. +pub fn check_unsupported_ns_flags(flags: CloneFlags) -> Result<()> { + const SUPPORTED_FLAGS: CloneFlags = CloneFlags::CLONE_NEWUTS; + + let unsupported_flags = + (flags & CloneFlags::CLONE_NS_FLAGS) - SUPPORTED_FLAGS - CloneFlags::CLONE_NEWUSER; + if unsupported_flags.is_empty() { + return Ok(()); + } + + warn!("unsupported clone ns flags: {:?}", unsupported_flags); + return_errno_with_message!(Errno::EINVAL, "unsupported clone namespace flags"); +} diff --git a/kernel/src/process/namespace/user_ns.rs b/kernel/src/process/namespace/user_ns.rs new file mode 100644 index 000000000..623556352 --- /dev/null +++ b/kernel/src/process/namespace/user_ns.rs @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: MPL-2.0 + +use spin::Once; + +use crate::{ + prelude::*, + process::{credentials::capabilities::CapSet, posix_thread::PosixThread}, +}; + +/// The user namespace. +pub struct UserNamespace { + _private: (), +} + +impl UserNamespace { + /// Returns a reference to the singleton initial user namespace. + pub fn get_init_singleton() -> &'static Arc { + static INIT: Once> = Once::new(); + + INIT.call_once(|| Arc::new(UserNamespace { _private: () })) + } + + /// Checks whether the thread has the required capability in this user namespace. + pub fn check_cap(&self, required: CapSet, posix_thread: &PosixThread) -> Result<()> { + // Since creating new user namespaces is not supported at the moment, + // there is effectively only one user namespace in the entire system. + // Therefore, the thread has a single set of capabilities used for permission checks. + // FIXME: Once support for creating new user namespaces is added, + // we should verify the thread's capabilities within the relevant user namespace. + let cap_set = posix_thread.credentials().effective_capset(); + if cap_set.contains(required) { + return Ok(()); + } + + return_errno_with_message!( + Errno::EPERM, + "the thread does not have the required capability" + ) + } +} diff --git a/kernel/src/process/posix_thread/builder.rs b/kernel/src/process/posix_thread/builder.rs index dd701d4bd..21831381a 100644 --- a/kernel/src/process/posix_thread/builder.rs +++ b/kernel/src/process/posix_thread/builder.rs @@ -18,7 +18,7 @@ use crate::{ process::{ posix_thread::name::ThreadName, signal::{sig_mask::AtomicSigMask, sig_queues::SigQueues}, - Credentials, Process, + Credentials, NsProxy, Process, UserNamespace, }, sched::{Nice, SchedPolicy}, thread::{task, Thread, Tid}, @@ -43,6 +43,8 @@ pub struct PosixThreadBuilder { sig_queues: SigQueues, sched_policy: SchedPolicy, fpu_context: FpuContext, + user_ns: Option>, + ns_proxy: Option>, is_init_process: bool, } @@ -63,6 +65,8 @@ impl PosixThreadBuilder { sched_policy: SchedPolicy::Fair(Nice::default()), fpu_context: FpuContext::new(), is_init_process: false, + user_ns: None, + ns_proxy: None, } } @@ -106,6 +110,16 @@ impl PosixThreadBuilder { self } + pub fn user_ns(mut self, user_ns: Arc) -> Self { + self.user_ns = Some(user_ns); + self + } + + pub fn ns_proxy(mut self, ns_proxy: Arc) -> Self { + self.ns_proxy = Some(ns_proxy); + self + } + #[expect(clippy::wrong_self_convention)] pub(in crate::process) fn is_init_process(mut self) -> Self { self.is_init_process = true; @@ -127,6 +141,8 @@ impl PosixThreadBuilder { sig_queues, sched_policy, fpu_context, + user_ns, + ns_proxy, is_init_process, } = self; @@ -134,6 +150,10 @@ impl PosixThreadBuilder { let fs = fs.unwrap_or_else(|| Arc::new(ThreadFsInfo::default())); + assert_eq!(user_ns.is_none(), ns_proxy.is_none()); + let user_ns = user_ns.unwrap_or_else(|| UserNamespace::get_init_singleton().clone()); + let ns_proxy = ns_proxy.unwrap_or_else(|| NsProxy::get_init_singleton().clone()); + let root_vmar = process .upgrade() .unwrap() @@ -161,6 +181,7 @@ impl PosixThreadBuilder { virtual_timer_manager, prof_timer_manager, io_priority: AtomicU32::new(0), + ns_proxy: Mutex::new(Some(ns_proxy.clone())), } }; @@ -179,6 +200,8 @@ impl PosixThreadBuilder { file_table, fs, fpu_context, + user_ns, + ns_proxy, ); thread_table::add_thread(tid, thread.clone()); diff --git a/kernel/src/process/posix_thread/exit.rs b/kernel/src/process/posix_thread/exit.rs index 975e3f972..5178b3ea4 100644 --- a/kernel/src/process/posix_thread/exit.rs +++ b/kernel/src/process/posix_thread/exit.rs @@ -81,10 +81,12 @@ fn exit_internal(term_status: TermStatus, is_exiting_group: bool) { // Drop fields in `PosixThread`. *posix_thread.file_table().lock() = None; + *posix_thread.ns_proxy().lock() = None; // Drop fields in `ThreadLocal`. *thread_local.root_vmar().borrow_mut() = None; thread_local.borrow_file_table_mut().remove(); + thread_local.borrow_ns_proxy_mut().remove(); if is_last_thread { exit_process(&posix_process); diff --git a/kernel/src/process/posix_thread/mod.rs b/kernel/src/process/posix_thread/mod.rs index 45b127995..8237fa02d 100644 --- a/kernel/src/process/posix_thread/mod.rs +++ b/kernel/src/process/posix_thread/mod.rs @@ -21,7 +21,7 @@ use crate::{ events::Observer, fs::file_table::FileTable, prelude::*, - process::signal::constants::SIGCONT, + process::{namespace::nsproxy::NsProxy, signal::constants::SIGCONT}, thread::{Thread, Tid}, time::{clocks::ProfClock, Timer, TimerManager}, }; @@ -77,6 +77,9 @@ pub struct PosixThread { /// I/O Scheduling priority value io_priority: AtomicU32, + + /// The namespaces that the thread belongs to. + ns_proxy: Mutex>>, } impl PosixThread { @@ -310,6 +313,11 @@ impl PosixThread { pub fn io_priority(&self) -> &AtomicU32 { &self.io_priority } + + /// Returns the namespaces which the thread belongs to. + pub fn ns_proxy(&self) -> &Mutex>> { + &self.ns_proxy + } } static POSIX_TID_ALLOCATOR: AtomicU32 = AtomicU32::new(1); diff --git a/kernel/src/process/posix_thread/thread_local.rs b/kernel/src/process/posix_thread/thread_local.rs index 1d6aeac98..19e4cf5b2 100644 --- a/kernel/src/process/posix_thread/thread_local.rs +++ b/kernel/src/process/posix_thread/thread_local.rs @@ -9,7 +9,7 @@ use super::RobustListHead; use crate::{ fs::{file_table::FileTable, thread_info::ThreadFsInfo}, prelude::*, - process::signal::SigStack, + process::{signal::SigStack, NsProxy, UserNamespace}, vm::vmar::Vmar, }; @@ -45,9 +45,14 @@ pub struct ThreadLocal { sig_context: Cell>, /// Stack address, size, and flags for the signal handler. sig_stack: RefCell, + + // Namespaces. + user_ns: RefCell>, + ns_proxy: RefCell>>, } impl ThreadLocal { + #[expect(clippy::too_many_arguments)] pub(super) fn new( set_child_tid: Vaddr, clear_child_tid: Vaddr, @@ -55,6 +60,8 @@ impl ThreadLocal { file_table: RwArc, fs: Arc, fpu_context: FpuContext, + user_ns: Arc, + ns_proxy: Arc, ) -> Self { Self { set_child_tid: Cell::new(set_child_tid), @@ -68,6 +75,8 @@ impl ThreadLocal { sig_stack: RefCell::new(SigStack::default()), fpu_context: RefCell::new(fpu_context), fpu_state: Cell::new(FpuState::Unloaded), + user_ns: RefCell::new(user_ns), + ns_proxy: RefCell::new(Some(ns_proxy)), } } @@ -127,19 +136,18 @@ impl ThreadLocal { } pub fn borrow_file_table(&self) -> FileTableRef { - FileTableRef(self.file_table.borrow()) + ThreadLocalOptionRef(self.file_table.borrow()) } pub fn borrow_file_table_mut(&self) -> FileTableRefMut { - FileTableRefMut(self.file_table.borrow_mut()) + ThreadLocalOptionRefMut(self.file_table.borrow_mut()) } pub fn borrow_fs(&self) -> Ref<'_, Arc> { self.fs.borrow() } - #[expect(dead_code)] - pub fn borrow_fs_mut(&self) -> RefMut<'_, Arc> { + pub(in crate::process) fn borrow_fs_mut(&self) -> RefMut<'_, Arc> { self.fs.borrow_mut() } @@ -154,6 +162,18 @@ impl ThreadLocal { pub fn fpu(&self) -> ThreadFpu<'_> { ThreadFpu(self) } + + pub fn borrow_user_ns(&self) -> Ref<'_, Arc> { + self.user_ns.borrow() + } + + pub fn borrow_ns_proxy(&self) -> NsProxyRef { + ThreadLocalOptionRef(self.ns_proxy.borrow()) + } + + pub(in crate::process) fn borrow_ns_proxy_mut(&self) -> NsProxyRefMut { + ThreadLocalOptionRefMut(self.ns_proxy.borrow_mut()) + } } /// The current state of `ThreadFpu`. @@ -243,40 +263,52 @@ impl ThreadFpu<'_> { } /// An immutable, shared reference to the file table in [`ThreadLocal`]. -pub struct FileTableRef<'a>(Ref<'a, Option>>); +pub type FileTableRef<'a> = ThreadLocalOptionRef<'a, RwArc>; -impl FileTableRef<'_> { - /// Unwraps and returns a reference to the file table. +/// An immutable, shared reference to the `NsProxy` in [`ThreadLocal`]. +pub type NsProxyRef<'a> = ThreadLocalOptionRef<'a, Arc>; + +/// An immutable, shared reference to thread-local data contained within a `RefCell>`. +pub struct ThreadLocalOptionRef<'a, T>(Ref<'a, Option>); + +impl ThreadLocalOptionRef<'_, T> { + /// Unwraps and returns a reference to the data. /// /// # Panics /// - /// This method will panic if the thread has exited and the file table has been dropped. - pub fn unwrap(&self) -> &RwArc { + /// This method will panic if the thread has exited and the data has been dropped. + pub fn unwrap(&self) -> &T { self.0.as_ref().unwrap() } } /// A mutable, exclusive reference to the file table in [`ThreadLocal`]. -pub struct FileTableRefMut<'a>(RefMut<'a, Option>>); +pub type FileTableRefMut<'a> = ThreadLocalOptionRefMut<'a, RwArc>; -impl FileTableRefMut<'_> { - /// Unwraps and returns a reference to the file table. +/// A mutable, exclusive reference to the `NsProxy` in [`ThreadLocal`]. +pub(in crate::process) type NsProxyRefMut<'a> = ThreadLocalOptionRefMut<'a, Arc>; + +/// A mutable, exclusive reference to thread-local data contained within a `RefCell>`. +pub struct ThreadLocalOptionRefMut<'a, T>(RefMut<'a, Option>); + +impl ThreadLocalOptionRefMut<'_, T> { + /// Unwraps and returns a reference to the data. /// /// # Panics /// - /// This method will panic if the thread has exited and the file table has been dropped. - pub fn unwrap(&mut self) -> &mut RwArc { + /// This method will panic if the thread has exited and the data has been dropped. + pub fn unwrap(&mut self) -> &mut T { self.0.as_mut().unwrap() } - /// Removes the file table and drops it. + /// Removes the data and drops it. pub(super) fn remove(&mut self) { *self.0 = None; } - /// Replaces the file table with a new one, returning the old one. - pub fn replace(&mut self, new_table: Option>) -> Option> { - core::mem::replace(&mut *self.0, new_table) + /// Replaces the data with a new one, returning the old one. + pub(in crate::process) fn replace(&mut self, new: Option) -> Option { + core::mem::replace(&mut *self.0, new) } } diff --git a/kernel/src/process/process/init_proc.rs b/kernel/src/process/process/init_proc.rs index 19e0a7d72..6052c3e64 100644 --- a/kernel/src/process/process/init_proc.rs +++ b/kernel/src/process/process/init_proc.rs @@ -17,7 +17,7 @@ use crate::{ process_vm::ProcessVm, rlimit::ResourceLimits, signal::sig_disposition::SigDispositions, - Credentials, ProgramToLoad, + Credentials, ProgramToLoad, UserNamespace, }, sched::Nice, thread::Tid, @@ -52,6 +52,7 @@ fn create_init_process( let resource_limits = ResourceLimits::default(); let nice = Nice::default(); let sig_dispositions = Arc::new(Mutex::new(SigDispositions::default())); + let user_ns = UserNamespace::get_init_singleton().clone(); let init_proc = Process::new( pid, @@ -61,6 +62,7 @@ fn create_init_process( resource_limits, nice, sig_dispositions, + user_ns, ); let init_task = create_init_task( diff --git a/kernel/src/process/process/mod.rs b/kernel/src/process/process/mod.rs index ed23a73a5..da310fd15 100644 --- a/kernel/src/process/process/mod.rs +++ b/kernel/src/process/process/mod.rs @@ -18,7 +18,7 @@ use super::{ }; use crate::{ prelude::*, - process::{signal::Pollee, status::StopWaitStatus, WaitOptions}, + process::{signal::Pollee, status::StopWaitStatus, UserNamespace, WaitOptions}, sched::{AtomicNice, Nice}, thread::{AsThread, Thread}, time::clocks::ProfClock, @@ -122,6 +122,11 @@ pub struct Process { /// A manager that manages timer resources and utilities of the process. timer_manager: PosixTimerManager, + + // Namespaces + /// The user namespace + #[expect(dead_code)] + user_ns: Mutex>, } /// Representing a parent process by holding a weak reference to it and its PID. @@ -187,6 +192,7 @@ impl Process { Some(Task::current()?.as_posix_thread()?.process()) } + #[expect(clippy::too_many_arguments)] pub(super) fn new( pid: Pid, parent: Weak, @@ -196,6 +202,7 @@ impl Process { resource_limits: ResourceLimits, nice: Nice, sig_dispositions: Arc>, + user_ns: Arc, ) -> Arc { // SIGCHID does not interrupt pauser. Child process will // resume paused parent when doing exit. @@ -223,6 +230,7 @@ impl Process { nice: AtomicNice::new(nice), timer_manager: PosixTimerManager::new(&prof_clock, process_ref), prof_clock, + user_ns: Mutex::new(user_ns), }) } diff --git a/kernel/src/syscall/mod.rs b/kernel/src/syscall/mod.rs index 4d8383fb8..aa1646c75 100644 --- a/kernel/src/syscall/mod.rs +++ b/kernel/src/syscall/mod.rs @@ -401,7 +401,3 @@ macro_rules! log_syscall_entry { } }; } - -pub(super) fn init() { - uname::init(); -} diff --git a/kernel/src/syscall/uname.rs b/kernel/src/syscall/uname.rs index f84a79e5e..40f7d0437 100644 --- a/kernel/src/syscall/uname.rs +++ b/kernel/src/syscall/uname.rs @@ -1,62 +1,11 @@ // SPDX-License-Identifier: MPL-2.0 - -use spin::Once; - use super::SyscallReturn; use crate::prelude::*; -// We don't use the real name and version of our os here. Instead, we pick up fake values witch is the same as the ones of linux. -// The values are used to fool glibc since glibc will check the version and os name. -static UTS_NAME: Once = Once::new(); - -const UTS_FIELD_LEN: usize = 65; - -#[derive(Debug, Clone, Copy, Pod)] -#[repr(C)] -struct UtsName { - sysname: [u8; UTS_FIELD_LEN], - nodename: [u8; UTS_FIELD_LEN], - release: [u8; UTS_FIELD_LEN], - version: [u8; UTS_FIELD_LEN], - machine: [u8; UTS_FIELD_LEN], - domainname: [u8; UTS_FIELD_LEN], -} - -impl UtsName { - const fn new() -> Self { - UtsName { - sysname: [0; UTS_FIELD_LEN], - nodename: [0; UTS_FIELD_LEN], - release: [0; UTS_FIELD_LEN], - version: [0; UTS_FIELD_LEN], - machine: [0; UTS_FIELD_LEN], - domainname: [0; UTS_FIELD_LEN], - } - } -} - -pub(super) fn init() { - UTS_NAME.call_once(|| { - let copy_slice = |src: &[u8], dst: &mut [u8]| { - let len = src.len().min(dst.len()); - dst[..len].copy_from_slice(&src[..len]); - }; - - let mut uts_name = UtsName::new(); - copy_slice(b"Linux", &mut uts_name.sysname); - copy_slice(b"WHITLEY", &mut uts_name.nodename); - copy_slice(b"5.13.0", &mut uts_name.release); - copy_slice(b"5.13.0", &mut uts_name.version); - copy_slice(b"x86_64", &mut uts_name.machine); - copy_slice(b"", &mut uts_name.domainname); - - uts_name - }); -} - pub fn sys_uname(old_uname_addr: Vaddr, ctx: &Context) -> Result { debug!("old uname addr = 0x{:x}", old_uname_addr); - ctx.user_space() - .write_val(old_uname_addr, UTS_NAME.get().unwrap())?; + let ns_proxy = ctx.thread_local.borrow_ns_proxy(); + let uts_name = ns_proxy.unwrap().uts_ns().uts_name(); + ctx.user_space().write_val(old_uname_addr, uts_name)?; Ok(SyscallReturn::Return(0)) } diff --git a/kernel/src/util/mod.rs b/kernel/src/util/mod.rs index 8520b9a98..8faccfc74 100644 --- a/kernel/src/util/mod.rs +++ b/kernel/src/util/mod.rs @@ -2,8 +2,10 @@ mod iovec; pub mod net; +mod padded; pub mod per_cpu_counter; pub mod random; pub mod ring_buffer; pub use iovec::{MultiRead, MultiWrite, VmReaderArray, VmWriterArray}; +pub use padded::padded; diff --git a/kernel/src/util/padded.rs b/kernel/src/util/padded.rs new file mode 100644 index 000000000..fc36fabd5 --- /dev/null +++ b/kernel/src/util/padded.rs @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: MPL-2.0 + +/// Creates a fixed-size byte array out of a byte slice. +/// +/// # Example +/// +/// ``` +/// let fixed_size_text: [u8; 128] = padded(b"Hello World"); +/// ``` +/// +/// Without this `padded` utility function, +/// one would have to write a more lengthy but less efficient version. +/// +/// ``` +/// let fixed_size_text: [u8; 128] = { +/// const HELLO: &[u8] = b"Hello World"; +/// let mut buf = [0u8; 128]; +/// buf[..HELLO.len()].copy_from_slice(HELLO); +/// buf +/// }; +/// ``` +pub const fn padded(s: &[u8]) -> [u8; N] { + let mut out = [0u8; N]; + let mut i = 0; + while i < s.len() && i < N { + out[i] = s[i]; + i += 1; + } + out +}