Centos-kernel-stream-9/net/ipv6/ping.c

290 lines
7.8 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
* interface as the means of communication with the user level.
*
* "Ping" sockets
*
* Based on ipv4/ping.c code.
*
* Authors: Lorenzo Colitti (IPv6 support)
* Vasiliy Kulikov / Openwall (IPv4 implementation, for Linux 2.6),
* Pavel Kankovsky (IPv4 implementation, for Linux 2.4.32)
*/
#include <net/addrconf.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <net/protocol.h>
#include <net/udp.h>
#include <net/transp_v6.h>
#include <linux/proc_fs.h>
#include <linux/bpf-cgroup.h>
#include <net/ping.h>
/* Compatibility glue so we can support IPv6 when it's compiled as a module */
static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len,
int *addr_len)
{
return -EAFNOSUPPORT;
}
static void dummy_ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg,
struct sk_buff *skb)
{
}
static int dummy_icmpv6_err_convert(u8 type, u8 code, int *err)
{
return -EAFNOSUPPORT;
}
static void dummy_ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
__be16 port, u32 info, u8 *payload) {}
static int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
const struct net_device *dev, int strict)
{
return 0;
}
static int ping_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
int addr_len)
{
/* This check is replicated from __ip6_datagram_connect() and
* intended to prevent BPF program called below from accessing
* bytes that are out of the bound specified by user in addr_len.
*/
if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr);
}
static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
struct icmp6hdr user_icmph;
int addr_type;
struct in6_addr *daddr;
int oif = 0;
struct flowi6 fl6;
int err;
struct dst_entry *dst;
struct rt6_info *rt;
struct pingfakehdr pfh;
struct ipcm6_cookie ipc6;
pr_debug("ping_v6_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num);
err = ping_common_sendmsg(AF_INET6, msg, len, &user_icmph,
sizeof(user_icmph));
if (err)
return err;
if (msg->msg_name) {
DECLARE_SOCKADDR(struct sockaddr_in6 *, u, msg->msg_name);
if (msg->msg_namelen < sizeof(*u))
return -EINVAL;
if (u->sin6_family != AF_INET6) {
return -EAFNOSUPPORT;
}
daddr = &(u->sin6_addr);
if (__ipv6_addr_needs_scope_id(ipv6_addr_type(daddr)))
oif = u->sin6_scope_id;
} else {
if (sk->sk_state != TCP_ESTABLISHED)
return -EDESTADDRREQ;
daddr = &sk->sk_v6_daddr;
}
if (!oif)
oif = sk->sk_bound_dev_if;
if (!oif)
oif = np->sticky_pktinfo.ipi6_ifindex;
if (!oif && ipv6_addr_is_multicast(daddr))
oif = np->mcast_oif;
else if (!oif)
oif = np->ucast_oif;
addr_type = ipv6_addr_type(daddr);
if ((__ipv6_addr_needs_scope_id(addr_type) && !oif) ||
(addr_type & IPV6_ADDR_MAPPED) ||
ping6: Fix send to link-local addresses with VRF. Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2218130 Upstream Status: net.git commit 91ffd1bae1da commit 91ffd1bae1dafbb9e34b46813f5b058581d9144d Author: Guillaume Nault <gnault@redhat.com> Date: Wed Jun 7 18:05:02 2023 +0200 ping6: Fix send to link-local addresses with VRF. Ping sockets can't send packets when they're bound to a VRF master device and the output interface is set to a slave device. For example, when net.ipv4.ping_group_range is properly set, so that ping6 can use ping sockets, the following kind of commands fails: $ ip vrf exec red ping6 fe80::854:e7ff:fe88:4bf1%eth1 What happens is that sk->sk_bound_dev_if is set to the VRF master device, but 'oif' is set to the real output device. Since both are set but different, ping_v6_sendmsg() sees their value as inconsistent and fails. Fix this by allowing 'oif' to be a slave device of ->sk_bound_dev_if. This fixes the following kselftest failure: $ ./fcnal-test.sh -t ipv6_ping [...] TEST: ping out, vrf device+address bind - ns-B IPv6 LLA [FAIL] Reported-by: Mirsad Todorovac <mirsad.todorovac@alu.unizg.hr> Closes: https://lore.kernel.org/netdev/b6191f90-ffca-dbca-7d06-88a9788def9c@alu.unizg.hr/ Tested-by: Mirsad Todorovac <mirsad.todorovac@alu.unizg.hr> Fixes: 5e457896986e ("net: ipv6: Fix ping to link-local addresses.") Signed-off-by: Guillaume Nault <gnault@redhat.com> Reviewed-by: David Ahern <dsahern@kernel.org> Link: https://lore.kernel.org/r/6c8b53108816a8d0d5705ae37bdc5a8322b5e3d9.1686153846.git.gnault@redhat.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Hangbin Liu <haliu@redhat.com>
2023-06-29 01:43:51 +00:00
(oif && sk->sk_bound_dev_if && oif != sk->sk_bound_dev_if &&
l3mdev_master_ifindex_by_index(sock_net(sk), oif) != sk->sk_bound_dev_if))
return -EINVAL;
/* TODO: use ip6_datagram_send_ctl to get options from cmsg */
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = IPPROTO_ICMPV6;
fl6.saddr = np->saddr;
fl6.daddr = *daddr;
fl6.flowi6_oif = oif;
fl6.flowi6_mark = sk->sk_mark;
fl6.flowi6_uid = sk->sk_uid;
fl6.fl6_icmp_type = user_icmph.icmp6_type;
fl6.fl6_icmp_code = user_icmph.icmp6_code;
security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
ipcm6_init_sk(&ipc6, np);
ipc6.sockc.mark = sk->sk_mark;
fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr, false);
if (IS_ERR(dst))
return PTR_ERR(dst);
rt = (struct rt6_info *) dst;
if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
fl6.flowi6_oif = np->mcast_oif;
else if (!fl6.flowi6_oif)
fl6.flowi6_oif = np->ucast_oif;
pfh.icmph.type = user_icmph.icmp6_type;
pfh.icmph.code = user_icmph.icmp6_code;
pfh.icmph.checksum = 0;
pfh.icmph.un.echo.id = inet->inet_sport;
pfh.icmph.un.echo.sequence = user_icmph.icmp6_sequence;
pfh.msg = msg;
pfh.wcheck = 0;
pfh.family = AF_INET6;
ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
lock_sock(sk);
err = ip6_append_data(sk, ping_getfrag, &pfh, len,
0, &ipc6, &fl6, rt,
MSG_DONTWAIT);
if (err) {
ICMP6_INC_STATS(sock_net(sk), rt->rt6i_idev,
ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
} else {
icmpv6_push_pending_frames(sk, &fl6,
(struct icmp6hdr *)&pfh.icmph, len);
}
release_sock(sk);
dst_release(dst);
if (err)
return err;
return len;
}
struct proto pingv6_prot = {
.name = "PINGv6",
.owner = THIS_MODULE,
.init = ping_init_sock,
.close = ping_close,
.pre_connect = ping_v6_pre_connect,
.connect = ip6_datagram_connect_v6_only,
.disconnect = __udp_disconnect,
.setsockopt = ipv6_setsockopt,
.getsockopt = ipv6_getsockopt,
.sendmsg = ping_v6_sendmsg,
.recvmsg = ping_recvmsg,
.bind = ping_bind,
.backlog_rcv = ping_queue_rcv_skb,
.hash = ping_hash,
.unhash = ping_unhash,
.get_port = ping_get_port,
net: bpf: Handle return value of BPF_CGROUP_RUN_PROG_INET{4,6}_POST_BIND() Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2071620 commit 91a760b26926265a60c77ddf016529bcf3e17a04 Author: Menglong Dong <imagedong@tencent.com> Date: Thu Jan 6 21:20:20 2022 +0800 net: bpf: Handle return value of BPF_CGROUP_RUN_PROG_INET{4,6}_POST_BIND() The return value of BPF_CGROUP_RUN_PROG_INET{4,6}_POST_BIND() in __inet_bind() is not handled properly. While the return value is non-zero, it will set inet_saddr and inet_rcv_saddr to 0 and exit: err = BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk); if (err) { inet->inet_saddr = inet->inet_rcv_saddr = 0; goto out_release_sock; } Let's take UDP for example and see what will happen. For UDP socket, it will be added to 'udp_prot.h.udp_table->hash' and 'udp_prot.h.udp_table->hash2' after the sk->sk_prot->get_port() called success. If 'inet->inet_rcv_saddr' is specified here, then 'sk' will be in the 'hslot2' of 'hash2' that it don't belong to (because inet_saddr is changed to 0), and UDP packet received will not be passed to this sock. If 'inet->inet_rcv_saddr' is not specified here, the sock will work fine, as it can receive packet properly, which is wired, as the 'bind()' is already failed. To undo the get_port() operation, introduce the 'put_port' field for 'struct proto'. For TCP proto, it is inet_put_port(); For UDP proto, it is udp_lib_unhash(); For icmp proto, it is ping_unhash(). Therefore, after sys_bind() fail caused by BPF_CGROUP_RUN_PROG_INET4_POST_BIND(), it will be unbinded, which means that it can try to be binded to another port. Signed-off-by: Menglong Dong <imagedong@tencent.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Link: https://lore.kernel.org/bpf/20220106132022.3470772-2-imagedong@tencent.com Signed-off-by: Felix Maurer <fmaurer@redhat.com>
2022-08-24 14:53:48 +00:00
.put_port = ping_unhash,
.obj_size = sizeof(struct raw6_sock),
ipv6: remove hard coded limitation on ipv6_pinfo JIRA: https://issues.redhat.com/browse/RHEL-31050 Upstream Status: net.git commit f5f80e32de12 Conflicts: context conflicts due to missing commit 67fb43308f4b ("udp: Set NULL to sk->sk_prot->h.udp_table."). commit f5f80e32de12fad2813d37270e8364a03e6d3ef0 Author: Eric Dumazet <edumazet@google.com> Date: Thu Jul 20 11:09:01 2023 +0000 ipv6: remove hard coded limitation on ipv6_pinfo IPv6 inet sockets are supposed to have a "struct ipv6_pinfo" field at the end of their definition, so that inet6_sk_generic() can derive from socket size the offset of the "struct ipv6_pinfo". This is very fragile, and prevents adding bigger alignment in sockets, because inet6_sk_generic() does not work if the compiler adds padding after the ipv6_pinfo component. We are currently working on a patch series to reorganize TCP structures for better data locality and found issues similar to the one fixed in commit f5d547676ca0 ("tcp: fix tcp_inet6_sk() for 32bit kernels") Alternative would be to force an alignment on "struct ipv6_pinfo", greater or equal to __alignof__(any ipv6 sock) to ensure there is no padding. This does not look great. v2: fix typo in mptcp_proto_v6_init() (Paolo) Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Chao Wu <wwchao@google.com> Cc: Wei Wang <weiwan@google.com> Cc: Coco Li <lixiaoyan@google.com> Cc: YiFei Zhu <zhuyifei@google.com> Reviewed-by: Simon Horman <simon.horman@corigine.com> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Hangbin Liu <haliu@redhat.com>
2024-04-02 09:44:44 +00:00
.ipv6_pinfo_offset = offsetof(struct raw6_sock, inet6),
};
EXPORT_SYMBOL_GPL(pingv6_prot);
static struct inet_protosw pingv6_protosw = {
.type = SOCK_DGRAM,
.protocol = IPPROTO_ICMPV6,
.prot = &pingv6_prot,
.ops = &inet6_sockraw_ops,
.flags = INET_PROTOSW_REUSE,
};
#ifdef CONFIG_PROC_FS
static void *ping_v6_seq_start(struct seq_file *seq, loff_t *pos)
{
return ping_seq_start(seq, pos, AF_INET6);
}
static int ping_v6_seq_show(struct seq_file *seq, void *v)
{
if (v == SEQ_START_TOKEN) {
seq_puts(seq, IPV6_SEQ_DGRAM_HEADER);
} else {
int bucket = ((struct ping_iter_state *) seq->private)->bucket;
struct inet_sock *inet = inet_sk((struct sock *)v);
__u16 srcp = ntohs(inet->inet_sport);
__u16 destp = ntohs(inet->inet_dport);
ip6_dgram_sock_seq_show(seq, v, srcp, destp, bucket);
}
return 0;
}
static const struct seq_operations ping_v6_seq_ops = {
.start = ping_v6_seq_start,
.show = ping_v6_seq_show,
.next = ping_seq_next,
.stop = ping_seq_stop,
};
static int __net_init ping_v6_proc_init_net(struct net *net)
{
if (!proc_create_net("icmp6", 0444, net->proc_net, &ping_v6_seq_ops,
sizeof(struct ping_iter_state)))
return -ENOMEM;
return 0;
}
static void __net_exit ping_v6_proc_exit_net(struct net *net)
{
remove_proc_entry("icmp6", net->proc_net);
}
static struct pernet_operations ping_v6_net_ops = {
.init = ping_v6_proc_init_net,
.exit = ping_v6_proc_exit_net,
};
#endif
int __init pingv6_init(void)
{
#ifdef CONFIG_PROC_FS
int ret = register_pernet_subsys(&ping_v6_net_ops);
if (ret)
return ret;
#endif
pingv6_ops.ipv6_recv_error = ipv6_recv_error;
pingv6_ops.ip6_datagram_recv_common_ctl = ip6_datagram_recv_common_ctl;
pingv6_ops.ip6_datagram_recv_specific_ctl =
ip6_datagram_recv_specific_ctl;
pingv6_ops.icmpv6_err_convert = icmpv6_err_convert;
pingv6_ops.ipv6_icmp_error = ipv6_icmp_error;
pingv6_ops.ipv6_chk_addr = ipv6_chk_addr;
return inet6_register_protosw(&pingv6_protosw);
}
/* This never gets called because it's not possible to unload the ipv6 module,
* but just in case.
*/
void pingv6_exit(void)
{
pingv6_ops.ipv6_recv_error = dummy_ipv6_recv_error;
pingv6_ops.ip6_datagram_recv_common_ctl = dummy_ip6_datagram_recv_ctl;
pingv6_ops.ip6_datagram_recv_specific_ctl = dummy_ip6_datagram_recv_ctl;
pingv6_ops.icmpv6_err_convert = dummy_icmpv6_err_convert;
pingv6_ops.ipv6_icmp_error = dummy_ipv6_icmp_error;
pingv6_ops.ipv6_chk_addr = dummy_ipv6_chk_addr;
#ifdef CONFIG_PROC_FS
unregister_pernet_subsys(&ping_v6_net_ops);
#endif
inet6_unregister_protosw(&pingv6_protosw);
}