From 65aa156e92595a5280a8c8ff7799830c5b40cbbf Mon Sep 17 00:00:00 2001 From: Ruihan Li Date: Sat, 27 Jul 2024 11:21:26 +0800 Subject: [PATCH] Implement UNIX abstract address namespace --- kernel/src/net/socket/unix/addr.rs | 68 +++++++++- kernel/src/net/socket/unix/mod.rs | 1 + kernel/src/net/socket/unix/ns/abs.rs | 122 ++++++++++++++++++ kernel/src/net/socket/unix/ns/mod.rs | 9 ++ kernel/src/net/socket/unix/ns/path.rs | 57 ++++++++ kernel/src/net/socket/unix/stream/init.rs | 30 +---- kernel/src/net/socket/unix/stream/listener.rs | 65 ++++------ kernel/src/net/socket/unix/stream/socket.rs | 41 +----- test/apps/network/unix_err.c | 61 +++++++++ 9 files changed, 338 insertions(+), 116 deletions(-) create mode 100644 kernel/src/net/socket/unix/ns/abs.rs create mode 100644 kernel/src/net/socket/unix/ns/mod.rs create mode 100644 kernel/src/net/socket/unix/ns/path.rs diff --git a/kernel/src/net/socket/unix/addr.rs b/kernel/src/net/socket/unix/addr.rs index 7e6571ffc..44ae91613 100644 --- a/kernel/src/net/socket/unix/addr.rs +++ b/kernel/src/net/socket/unix/addr.rs @@ -1,6 +1,13 @@ // SPDX-License-Identifier: MPL-2.0 -use crate::{fs::path::Dentry, net::socket::util::socket_addr::SocketAddr, prelude::*}; +use keyable_arc::KeyableArc; + +use super::ns::{self, AbstractHandle}; +use crate::{ + fs::{path::Dentry, utils::Inode}, + net::socket::util::socket_addr::SocketAddr, + prelude::*, +}; #[derive(Clone, Debug, PartialEq, Eq)] pub enum UnixSocketAddr { @@ -9,10 +16,36 @@ pub enum UnixSocketAddr { Abstract(Arc<[u8]>), } -#[derive(Clone, Debug)] -pub(super) enum UnixSocketAddrBound { - Path(Arc, Arc), - Abstract(Arc<[u8]>), +impl UnixSocketAddr { + pub(super) fn bind(self) -> Result { + let bound = match self { + Self::Unnamed => UnixSocketAddrBound::Abstract(ns::alloc_ephemeral_abstract_name()?), + Self::Path(path) => { + let dentry = ns::create_socket_file(&path)?; + UnixSocketAddrBound::Path(path, dentry) + } + Self::Abstract(name) => UnixSocketAddrBound::Abstract(ns::create_abstract_name(name)?), + }; + + Ok(bound) + } + + pub(super) fn connect(&self) -> Result { + let bound = match self { + Self::Unnamed => return_errno_with_message!( + Errno::EINVAL, + "the unnamed UNIX domain socket address is not valid for connecting" + ), + Self::Path(path) => UnixSocketAddrKey::Path(KeyableArc::from( + ns::lookup_socket_file(path)?.inode().clone(), + )), + Self::Abstract(name) => { + UnixSocketAddrKey::Abstract(KeyableArc::from(ns::lookup_abstract_name(name)?)) + } + }; + + Ok(bound) + } } impl TryFrom for UnixSocketAddr { @@ -26,11 +59,34 @@ impl TryFrom for UnixSocketAddr { } } +#[derive(Clone, Debug)] +pub(super) enum UnixSocketAddrBound { + Path(Arc, Arc), + Abstract(Arc), +} + +#[derive(Clone, Debug, PartialEq, Eq, Ord, PartialOrd, Hash)] +pub(super) enum UnixSocketAddrKey { + Path(KeyableArc), + Abstract(KeyableArc), +} + +impl UnixSocketAddrBound { + pub(super) fn to_key(&self) -> UnixSocketAddrKey { + match self { + Self::Path(_, dentry) => { + UnixSocketAddrKey::Path(KeyableArc::from(dentry.inode().clone())) + } + Self::Abstract(handle) => UnixSocketAddrKey::Abstract(KeyableArc::from(handle.clone())), + } + } +} + impl From for UnixSocketAddr { fn from(value: UnixSocketAddrBound) -> Self { match value { UnixSocketAddrBound::Path(path, _) => Self::Path(path), - UnixSocketAddrBound::Abstract(name) => Self::Abstract(name), + UnixSocketAddrBound::Abstract(name) => Self::Abstract(name.name()), } } } diff --git a/kernel/src/net/socket/unix/mod.rs b/kernel/src/net/socket/unix/mod.rs index 698dbf54a..e52662194 100644 --- a/kernel/src/net/socket/unix/mod.rs +++ b/kernel/src/net/socket/unix/mod.rs @@ -1,6 +1,7 @@ // SPDX-License-Identifier: MPL-2.0 mod addr; +mod ns; mod stream; pub use addr::UnixSocketAddr; diff --git a/kernel/src/net/socket/unix/ns/abs.rs b/kernel/src/net/socket/unix/ns/abs.rs new file mode 100644 index 000000000..909c625da --- /dev/null +++ b/kernel/src/net/socket/unix/ns/abs.rs @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: MPL-2.0 + +use alloc::{collections::btree_map::Entry, format}; + +use keyable_arc::KeyableArc; + +use crate::prelude::*; + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct AbstractHandle(KeyableArc<[u8]>); + +impl AbstractHandle { + fn new(name: Arc<[u8]>) -> Self { + Self(KeyableArc::from(name)) + } + + pub fn name(&self) -> Arc<[u8]> { + self.0.clone().into() + } +} + +impl Drop for AbstractHandle { + fn drop(&mut self) { + HANDLE_TABLE.remove(self.name()); + } +} + +static HANDLE_TABLE: HandleTable = HandleTable::new(); + +struct HandleTable { + handles: RwLock, Weak>>, +} + +impl HandleTable { + const fn new() -> Self { + Self { + handles: RwLock::new(BTreeMap::new()), + } + } + + fn create(&self, name: Arc<[u8]>) -> Option> { + let mut handles = self.handles.write(); + + let mut entry = handles.entry(name.clone()); + + if let Entry::Occupied(ref occupied) = entry { + // The handle is in use only if its strong count is greater than zero. + if occupied.get().strong_count() > 0 { + return None; + } + } + + let new_handle = Arc::new(AbstractHandle::new(name)); + let weak_handle = Arc::downgrade(&new_handle); + + match entry { + Entry::Occupied(ref mut occupied) => { + occupied.insert(weak_handle); + } + Entry::Vacant(vacant) => { + vacant.insert(weak_handle); + } + } + + Some(new_handle) + } + + fn remove(&self, name: Arc<[u8]>) { + let mut handles = self.handles.write(); + + let Entry::Occupied(occupied) = handles.entry(name) else { + return; + }; + + // Due to race conditions between `AbstractHandle::drop` and `HandleTable::create`, the + // entry may be occupied by another handle. + // + // Therefore, before removing the entry, we must check again if the entry should be removed. + if occupied.get().strong_count() == 0 { + occupied.remove(); + } + } + + fn lookup(&self, name: &[u8]) -> Option> { + let handles = self.handles.read(); + + handles.get(name).and_then(Weak::upgrade) + } + + fn alloc_ephemeral(&self) -> Option> { + // See "Autobind feature" in the man pages: + // . + // + // Note that false negatives are fine here. So we don't mind race conditions. + // + // TODO: Always starting with the first name is inefficient and leads to contention. + // Instead, we should generate some random names and check their availability. + (0..(1 << 20)) + .map(|num| format!("{:05x}", num)) + .map(|name| Arc::from(name.as_bytes())) + .filter_map(|name| self.create(name)) + .next() + } +} + +pub fn create_abstract_name(name: Arc<[u8]>) -> Result> { + HANDLE_TABLE.create(name).ok_or_else(|| { + Error::with_message(Errno::EADDRINUSE, "the abstract name is already in use") + }) +} + +pub fn alloc_ephemeral_abstract_name() -> Result> { + HANDLE_TABLE.alloc_ephemeral().ok_or_else(|| { + Error::with_message(Errno::ENOSPC, "no ephemeral abstract name is available") + }) +} + +pub fn lookup_abstract_name(name: &[u8]) -> Result> { + HANDLE_TABLE + .lookup(name) + .ok_or_else(|| Error::with_message(Errno::ECONNREFUSED, "the abstract name does not exist")) +} diff --git a/kernel/src/net/socket/unix/ns/mod.rs b/kernel/src/net/socket/unix/ns/mod.rs new file mode 100644 index 000000000..2e65f68a4 --- /dev/null +++ b/kernel/src/net/socket/unix/ns/mod.rs @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: MPL-2.0 + +pub(super) use abs::{ + alloc_ephemeral_abstract_name, create_abstract_name, lookup_abstract_name, AbstractHandle, +}; +pub(super) use path::{create_socket_file, lookup_socket_file}; + +mod abs; +mod path; diff --git a/kernel/src/net/socket/unix/ns/path.rs b/kernel/src/net/socket/unix/ns/path.rs new file mode 100644 index 000000000..f57c329a7 --- /dev/null +++ b/kernel/src/net/socket/unix/ns/path.rs @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: MPL-2.0 + +use crate::{ + fs::{ + fs_resolver::{split_path, FsPath}, + path::Dentry, + utils::{InodeMode, InodeType}, + }, + prelude::*, +}; + +pub fn lookup_socket_file(path: &str) -> Result> { + let dentry = { + let current = current!(); + let fs = current.fs().read(); + let fs_path = FsPath::try_from(path)?; + fs.lookup(&fs_path)? + }; + + if !dentry.mode()?.is_readable() || !dentry.mode()?.is_writable() { + return_errno_with_message!(Errno::EACCES, "the socket file cannot be read or written") + } + + if dentry.type_() != InodeType::Socket { + return_errno_with_message!( + Errno::ECONNREFUSED, + "the specified file is not a socket file" + ) + } + + Ok(dentry) +} + +pub fn create_socket_file(path: &str) -> Result> { + let (parent_pathname, file_name) = split_path(path); + + let parent = { + let current = current!(); + let fs = current.fs().read(); + let parent_path = FsPath::try_from(parent_pathname)?; + fs.lookup(&parent_path)? + }; + + parent + .new_fs_child( + file_name, + InodeType::Socket, + InodeMode::S_IRUSR | InodeMode::S_IWUSR, + ) + .map_err(|err| { + if err.error() == Errno::EEXIST { + Error::with_message(Errno::EADDRINUSE, "the socket file already exists") + } else { + err + } + }) +} diff --git a/kernel/src/net/socket/unix/stream/init.rs b/kernel/src/net/socket/unix/stream/init.rs index d9fafb474..74a159cfd 100644 --- a/kernel/src/net/socket/unix/stream/init.rs +++ b/kernel/src/net/socket/unix/stream/init.rs @@ -2,11 +2,6 @@ use crate::{ events::{IoEvents, Observer}, - fs::{ - fs_resolver::{split_path, FsPath}, - path::Dentry, - utils::{InodeMode, InodeType}, - }, net::socket::unix::addr::{UnixSocketAddr, UnixSocketAddrBound}, prelude::*, process::signal::{Pollee, Poller}, @@ -30,14 +25,7 @@ impl Init { return_errno_with_message!(Errno::EINVAL, "the socket is already bound"); } - let bound_addr = match addr_to_bind { - UnixSocketAddr::Unnamed => todo!(), - UnixSocketAddr::Abstract(_) => todo!(), - UnixSocketAddr::Path(path) => { - let dentry = create_socket_file(&path)?; - UnixSocketAddrBound::Path(path, dentry) - } - }; + let bound_addr = addr_to_bind.bind()?; self.addr = Some(bound_addr); Ok(()) @@ -67,19 +55,3 @@ impl Init { self.pollee.unregister_observer(observer) } } - -fn create_socket_file(path: &str) -> Result> { - let (parent_pathname, file_name) = split_path(path); - let parent = { - let current = current!(); - let fs = current.fs().read(); - let parent_path = FsPath::try_from(parent_pathname)?; - fs.lookup(&parent_path)? - }; - let dentry = parent.new_fs_child( - file_name, - InodeType::Socket, - InodeMode::S_IRUSR | InodeMode::S_IWUSR, - )?; - Ok(dentry) -} diff --git a/kernel/src/net/socket/unix/stream/listener.rs b/kernel/src/net/socket/unix/stream/listener.rs index f42edb651..d3c6f6d5a 100644 --- a/kernel/src/net/socket/unix/stream/listener.rs +++ b/kernel/src/net/socket/unix/stream/listener.rs @@ -2,13 +2,14 @@ use core::sync::atomic::{AtomicUsize, Ordering}; -use keyable_arc::KeyableWeak; - use super::{connected::Connected, UnixStreamSocket}; use crate::{ events::{IoEvents, Observer}, - fs::{file_handle::FileLike, path::Dentry, utils::Inode}, - net::socket::{unix::addr::UnixSocketAddrBound, SocketAddr}, + fs::file_handle::FileLike, + net::socket::{ + unix::addr::{UnixSocketAddrBound, UnixSocketAddrKey}, + SocketAddr, + }, prelude::*, process::signal::{Pollee, Poller}, }; @@ -62,15 +63,14 @@ impl Listener { impl Drop for Listener { fn drop(&mut self) { - unregister_backlog(self.backlog.addr()) + unregister_backlog(&self.backlog.addr().to_key()) } } static BACKLOG_TABLE: BacklogTable = BacklogTable::new(); struct BacklogTable { - backlog_sockets: RwLock, Arc>>, - // TODO: For linux, there is also abstract socket domain that a socket addr is not bound to an inode. + backlog_sockets: RwLock>>, } impl BacklogTable { @@ -81,41 +81,30 @@ impl BacklogTable { } fn add_backlog(&self, addr: UnixSocketAddrBound, backlog: usize) -> Option> { - let inode = { - let UnixSocketAddrBound::Path(_, ref dentry) = addr else { - todo!() - }; - create_keyable_inode(dentry) - }; - let new_backlog = Arc::new(Backlog::new(addr, backlog)); + let addr_key = addr.to_key(); let mut backlog_sockets = self.backlog_sockets.write(); - if backlog_sockets.contains_key(&inode) { + + if backlog_sockets.contains_key(&addr_key) { return None; } - backlog_sockets.insert(inode, new_backlog.clone()); + + let new_backlog = Arc::new(Backlog::new(addr, backlog)); + backlog_sockets.insert(addr_key, new_backlog.clone()); Some(new_backlog) } - fn get_backlog(&self, addr: &UnixSocketAddrBound) -> Option> { - let inode = { - let UnixSocketAddrBound::Path(_, dentry) = addr else { - todo!() - }; - create_keyable_inode(dentry) - }; - - let backlog_sockets = self.backlog_sockets.read(); - backlog_sockets.get(&inode).cloned() + fn get_backlog(&self, addr: &UnixSocketAddrKey) -> Option> { + self.backlog_sockets.read().get(addr).cloned() } fn push_incoming( &self, - server_addr: &UnixSocketAddrBound, + server_key: &UnixSocketAddrKey, client_addr: Option, ) -> Result { - let backlog = self.get_backlog(server_addr).ok_or_else(|| { + let backlog = self.get_backlog(server_key).ok_or_else(|| { Error::with_message( Errno::ECONNREFUSED, "no socket is listening at the remote address", @@ -125,13 +114,8 @@ impl BacklogTable { backlog.push_incoming(client_addr) } - fn remove_backlog(&self, addr: &UnixSocketAddrBound) { - let UnixSocketAddrBound::Path(_, dentry) = addr else { - todo!() - }; - - let inode = create_keyable_inode(dentry); - self.backlog_sockets.write().remove(&inode); + fn remove_backlog(&self, addr_key: &UnixSocketAddrKey) { + self.backlog_sockets.write().remove(addr_key); } } @@ -210,18 +194,13 @@ impl Backlog { } } -fn create_keyable_inode(dentry: &Arc) -> KeyableWeak { - let weak_inode = Arc::downgrade(dentry.inode()); - KeyableWeak::from(weak_inode) -} - -fn unregister_backlog(addr: &UnixSocketAddrBound) { +fn unregister_backlog(addr: &UnixSocketAddrKey) { BACKLOG_TABLE.remove_backlog(addr); } pub(super) fn push_incoming( - server_addr: &UnixSocketAddrBound, + server_key: &UnixSocketAddrKey, client_addr: Option, ) -> Result { - BACKLOG_TABLE.push_incoming(server_addr, client_addr) + BACKLOG_TABLE.push_incoming(server_key, client_addr) } diff --git a/kernel/src/net/socket/unix/stream/socket.rs b/kernel/src/net/socket/unix/stream/socket.rs index 3ac0e2014..e5eec582a 100644 --- a/kernel/src/net/socket/unix/stream/socket.rs +++ b/kernel/src/net/socket/unix/stream/socket.rs @@ -11,14 +11,9 @@ use super::{ }; use crate::{ events::{IoEvents, Observer}, - fs::{ - file_handle::FileLike, - fs_resolver::FsPath, - path::Dentry, - utils::{InodeType, StatusFlags}, - }, + fs::{file_handle::FileLike, utils::StatusFlags}, net::socket::{ - unix::{addr::UnixSocketAddrBound, UnixSocketAddr}, + unix::UnixSocketAddr, util::{ copy_message_from_user, copy_message_to_user, create_message_buffer, send_recv_flags::SendRecvFlags, socket_addr::SocketAddr, MessageHeader, @@ -203,19 +198,7 @@ impl Socket for UnixStreamSocket { } fn connect(&self, socket_addr: SocketAddr) -> Result<()> { - let remote_addr = { - let unix_socket_addr = UnixSocketAddr::try_from(socket_addr)?; - match unix_socket_addr { - UnixSocketAddr::Unnamed => todo!(), - UnixSocketAddr::Abstract(abstract_name) => { - UnixSocketAddrBound::Abstract(abstract_name) - } - UnixSocketAddr::Path(path) => { - let dentry = lookup_socket_file(&path)?; - UnixSocketAddrBound::Path(path, dentry) - } - } - }; + let remote_addr = UnixSocketAddr::try_from(socket_addr)?.connect()?; // Note that the Linux kernel implementation locks the remote socket and checks to see if // it is listening first. This is different from our implementation, which locks the local @@ -356,21 +339,3 @@ impl Socket for UnixStreamSocket { Ok((copied_bytes, message_header)) } } - -fn lookup_socket_file(path: &str) -> Result> { - let dentry = { - let current = current!(); - let fs = current.fs().read(); - let fs_path = FsPath::try_from(path)?; - fs.lookup(&fs_path)? - }; - - if dentry.type_() != InodeType::Socket { - return_errno_with_message!(Errno::ENOTSOCK, "not a socket file") - } - - if !dentry.mode()?.is_readable() || !dentry.mode()?.is_writable() { - return_errno_with_message!(Errno::EACCES, "the socket cannot be read or written") - } - Ok(dentry) -} diff --git a/test/apps/network/unix_err.c b/test/apps/network/unix_err.c index e6df2abd3..c497ea6c8 100644 --- a/test/apps/network/unix_err.c +++ b/test/apps/network/unix_err.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -233,3 +234,63 @@ FN_TEST(listen) TEST_ERRNO(listen(sk_accepted, 10), EINVAL); } END_TEST() + +FN_TEST(ns_path) +{ + int fd; + + fd = TEST_SUCC(creat("/tmp/.good", 0644)); + TEST_ERRNO(bind(sk_unbound, (struct sockaddr *)&UNIX_ADDR("/tmp/.good"), + sizeof(struct sockaddr)), + EADDRINUSE); + TEST_ERRNO(connect(sk_unbound, + (struct sockaddr *)&UNIX_ADDR("/tmp/.good"), + sizeof(struct sockaddr)), + ECONNREFUSED); + TEST_SUCC(close(fd)); + TEST_SUCC(unlink("/tmp/.good")); + + fd = TEST_SUCC(creat("/tmp/.bad", 0000)); + TEST_ERRNO(bind(sk_unbound, (struct sockaddr *)&UNIX_ADDR("/tmp/.bad"), + sizeof(struct sockaddr)), + EADDRINUSE); + TEST_ERRNO(connect(sk_unbound, + (struct sockaddr *)&UNIX_ADDR("/tmp/.bad"), + sizeof(struct sockaddr)), + EACCES); + TEST_SUCC(close(fd)); + TEST_SUCC(unlink("/tmp/.bad")); +} +END_TEST() + +FN_TEST(ns_abs) +{ + int sk, sk2; + struct sockaddr_un addr; + socklen_t addrlen; + + sk = TEST_SUCC(socket(PF_UNIX, SOCK_STREAM, 0)); + + TEST_SUCC(bind(sk, (struct sockaddr *)&UNIX_ADDR(""), PATH_OFFSET)); + addrlen = sizeof(addr); + TEST_RES(getsockname(sk, (struct sockaddr *)&addr, &addrlen), + addrlen == PATH_OFFSET + 6 && addr.sun_path[0] == '\0'); + + sk2 = TEST_SUCC(socket(PF_UNIX, SOCK_STREAM, 0)); + + TEST_ERRNO(bind(sk2, (struct sockaddr *)&addr, addrlen), EADDRINUSE); + TEST_ERRNO(connect(sk2, (struct sockaddr *)&addr, addrlen), + ECONNREFUSED); + TEST_SUCC(listen(sk, 1)); + TEST_SUCC(connect(sk2, (struct sockaddr *)&addr, addrlen)); + + TEST_SUCC(close(sk)); + TEST_SUCC(close(sk2)); + + sk = TEST_SUCC(socket(PF_UNIX, SOCK_STREAM, 0)); + TEST_ERRNO(connect(sk, (struct sockaddr *)&addr, addrlen), + ECONNREFUSED); + TEST_SUCC(bind(sk, (struct sockaddr *)&addr, addrlen)); + TEST_SUCC(close(sk)); +} +END_TEST()