From a2cbb1603943281a604f5adc48079a148db5cb0d Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Thu, 8 Aug 2024 16:06:40 -0700 Subject: [PATCH] tcp: Update window clamping condition This patch is based on the discussions between Neal Cardwell and Eric Dumazet in the link https://lore.kernel.org/netdev/20240726204105.1466841-1-quic_subashab@quicinc.com/ It was correctly pointed out that tp->window_clamp would not be updated in cases where net.ipv4.tcp_moderate_rcvbuf=0 or if (copied <= tp->rcvq_space.space). While it is expected for most setups to leave the sysctl enabled, the latter condition may not end up hitting depending on the TCP receive queue size and the pattern of arriving data. The updated check should be hit only on initial MSS update from TCP_MIN_MSS to measured MSS value and subsequently if there was an update to a larger value. Fixes: 05f76b2d634e ("tcp: Adjust clamping window for applications specifying SO_RCVBUF") Signed-off-by: Sean Tranchetti Signed-off-by: Subash Abhinov Kasiviswanathan Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e2b9583ed96a..e37488d3453f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -238,9 +238,14 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb) */ if (unlikely(len != icsk->icsk_ack.rcv_mss)) { u64 val = (u64)skb->len << TCP_RMEM_TO_WIN_SCALE; + u8 old_ratio = tcp_sk(sk)->scaling_ratio; do_div(val, skb->truesize); tcp_sk(sk)->scaling_ratio = val ? val : 1; + + if (old_ratio != tcp_sk(sk)->scaling_ratio) + WRITE_ONCE(tcp_sk(sk)->window_clamp, + tcp_win_from_space(sk, sk->sk_rcvbuf)); } icsk->icsk_ack.rcv_mss = min_t(unsigned int, len, tcp_sk(sk)->advmss); @@ -754,7 +759,8 @@ void tcp_rcv_space_adjust(struct sock *sk) * */ - if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf)) { + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) && + !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { u64 rcvwin, grow; int rcvbuf; @@ -770,22 +776,12 @@ void tcp_rcv_space_adjust(struct sock *sk) rcvbuf = min_t(u64, tcp_space_from_win(sk, rcvwin), READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])); - if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { - if (rcvbuf > sk->sk_rcvbuf) { - WRITE_ONCE(sk->sk_rcvbuf, rcvbuf); - - /* Make the window clamp follow along. */ - WRITE_ONCE(tp->window_clamp, - tcp_win_from_space(sk, rcvbuf)); - } - } else { - /* Make the window clamp follow along while being bounded - * by SO_RCVBUF. - */ - int clamp = tcp_win_from_space(sk, min(rcvbuf, sk->sk_rcvbuf)); + if (rcvbuf > sk->sk_rcvbuf) { + WRITE_ONCE(sk->sk_rcvbuf, rcvbuf); - if (clamp > tp->window_clamp) - WRITE_ONCE(tp->window_clamp, clamp); + /* Make the window clamp follow along. */ + WRITE_ONCE(tp->window_clamp, + tcp_win_from_space(sk, rcvbuf)); } } tp->rcvq_space.space = copied; -- 2.39.5