]> git.dujemihanovic.xyz Git - linux.git/commitdiff
tcp: Update window clamping condition
authorSubash Abhinov Kasiviswanathan <quic_subashab@quicinc.com>
Thu, 8 Aug 2024 23:06:40 +0000 (16:06 -0700)
committerDavid S. Miller <davem@davemloft.net>
Wed, 14 Aug 2024 09:50:49 +0000 (10:50 +0100)
This patch is based on the discussions between Neal Cardwell and
Eric Dumazet in the link
https://lore.kernel.org/netdev/20240726204105.1466841-1-quic_subashab@quicinc.com/

It was correctly pointed out that tp->window_clamp would not be
updated in cases where net.ipv4.tcp_moderate_rcvbuf=0 or if
(copied <= tp->rcvq_space.space). While it is expected for most
setups to leave the sysctl enabled, the latter condition may
not end up hitting depending on the TCP receive queue size and
the pattern of arriving data.

The updated check should be hit only on initial MSS update from
TCP_MIN_MSS to measured MSS value and subsequently if there was
an update to a larger value.

Fixes: 05f76b2d634e ("tcp: Adjust clamping window for applications specifying SO_RCVBUF")
Signed-off-by: Sean Tranchetti <quic_stranche@quicinc.com>
Signed-off-by: Subash Abhinov Kasiviswanathan <quic_subashab@quicinc.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
net/ipv4/tcp_input.c

index e2b9583ed96abc8da45121128210afdfcf4e5d23..e37488d3453f03b68928718a69559581b436812b 100644 (file)
@@ -238,9 +238,14 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
                 */
                if (unlikely(len != icsk->icsk_ack.rcv_mss)) {
                        u64 val = (u64)skb->len << TCP_RMEM_TO_WIN_SCALE;
+                       u8 old_ratio = tcp_sk(sk)->scaling_ratio;
 
                        do_div(val, skb->truesize);
                        tcp_sk(sk)->scaling_ratio = val ? val : 1;
+
+                       if (old_ratio != tcp_sk(sk)->scaling_ratio)
+                               WRITE_ONCE(tcp_sk(sk)->window_clamp,
+                                          tcp_win_from_space(sk, sk->sk_rcvbuf));
                }
                icsk->icsk_ack.rcv_mss = min_t(unsigned int, len,
                                               tcp_sk(sk)->advmss);
@@ -754,7 +759,8 @@ void tcp_rcv_space_adjust(struct sock *sk)
         * <prev RTT . ><current RTT .. ><next RTT .... >
         */
 
-       if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf)) {
+       if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
+           !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
                u64 rcvwin, grow;
                int rcvbuf;
 
@@ -770,22 +776,12 @@ void tcp_rcv_space_adjust(struct sock *sk)
 
                rcvbuf = min_t(u64, tcp_space_from_win(sk, rcvwin),
                               READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
-               if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
-                       if (rcvbuf > sk->sk_rcvbuf) {
-                               WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
-
-                               /* Make the window clamp follow along.  */
-                               WRITE_ONCE(tp->window_clamp,
-                                          tcp_win_from_space(sk, rcvbuf));
-                       }
-               } else {
-                       /* Make the window clamp follow along while being bounded
-                        * by SO_RCVBUF.
-                        */
-                       int clamp = tcp_win_from_space(sk, min(rcvbuf, sk->sk_rcvbuf));
+               if (rcvbuf > sk->sk_rcvbuf) {
+                       WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
 
-                       if (clamp > tp->window_clamp)
-                               WRITE_ONCE(tp->window_clamp, clamp);
+                       /* Make the window clamp follow along.  */
+                       WRITE_ONCE(tp->window_clamp,
+                                  tcp_win_from_space(sk, rcvbuf));
                }
        }
        tp->rcvq_space.space = copied;