diff --git a/include/net/tcp.h b/include/net/tcp.h index 5bc2e174426c..9b183b94acf8 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1400,6 +1400,16 @@ static inline bool tcp_checksum_complete(struct sk_buff *skb) bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb, enum skb_drop_reason *reason); + +void __sk_defer_free_flush(struct sock *sk); + +static inline void sk_defer_free_flush(struct sock *sk) +{ + if (llist_empty(&sk->defer_list)) + return; + __sk_defer_free_flush(sk); +} + int tcp_filter(struct sock *sk, struct sk_buff *skb); void tcp_set_state(struct sock *sk, int state); void tcp_done(struct sock *sk); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 224d576339a6..8bb809e4e800 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1652,14 +1652,34 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) __tcp_cleanup_rbuf(sk, copied); } +void __sk_defer_free_flush(struct sock *sk) +{ + struct llist_node *head; + struct sk_buff *skb, *n; + + head = llist_del_all(&sk->defer_list); + llist_for_each_entry_safe(skb, n, head, ll_node) { + prefetch(n); + skb_mark_not_on_list(skb); + __kfree_skb(skb); + } +} +EXPORT_SYMBOL(__sk_defer_free_flush); + static void tcp_eat_recv_skb(struct sock *sk, struct sk_buff *skb) { + __skb_unlink(skb, &sk->sk_receive_queue); if (likely(skb->destructor == sock_rfree)) { sock_rfree(skb); skb->destructor = NULL; skb->sk = NULL; + if (!skb_queue_empty(&sk->sk_receive_queue) || + !llist_empty(&sk->defer_list)) { + llist_add(&skb->ll_node, &sk->defer_list); + return; + } } - sk_eat_skb(sk, skb); + __kfree_skb(skb); } struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) @@ -2624,6 +2644,7 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, release_sock(sk); lock_sock(sk); } else { + sk_defer_free_flush(sk); err = sk_wait_data(sk, &timeo, last); if (err < 0) { err = copied ? : err; @@ -2746,6 +2767,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, ret = tcp_recvmsg_locked(sk, msg, len, nonblock, flags, &tss, &cmsg_flags); release_sock(sk); + sk_defer_free_flush(sk); if ((cmsg_flags || msg->msg_get_inq) && ret >= 0) { if (cmsg_flags & TCP_CMSG_TS) @@ -3278,7 +3300,7 @@ int tcp_disconnect(struct sock *sk, int flags) sk->sk_frag.page = NULL; sk->sk_frag.offset = 0; } - + sk_defer_free_flush(sk); sk_error_report(sk); return 0; } @@ -4408,6 +4430,7 @@ int do_tcp_getsockopt(struct sock *sk, int level, err = BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sk, level, optname, &zc, &len, err); sockopt_release_sock(sk); + sk_defer_free_flush(sk); if (len >= offsetofend(struct tcp_zerocopy_receive, msg_flags)) goto zerocopy_rcv_cmsg; switch (len) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 46cfc00e02ff..93ca9f23ab3f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2077,6 +2077,7 @@ process: sk_incoming_cpu_update(sk); + sk_defer_free_flush(sk); bh_lock_sock_nested(sk); tcp_segs_in(tcp_sk(sk), skb); ret = 0; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 926df1eb2d64..e803be77b36b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1732,6 +1732,7 @@ process: sk_incoming_cpu_update(sk); + sk_defer_free_flush(sk); bh_lock_sock_nested(sk); tcp_segs_in(tcp_sk(sk), skb); ret = 0;