]> git.dujemihanovic.xyz Git - linux.git/commitdiff
tcp: fix TCP_USER_TIMEOUT with zero window
authorEnke Chen <enchen@paloaltonetworks.com>
Fri, 15 Jan 2021 22:30:58 +0000 (14:30 -0800)
committerJakub Kicinski <kuba@kernel.org>
Tue, 19 Jan 2021 03:59:17 +0000 (19:59 -0800)
The TCP session does not terminate with TCP_USER_TIMEOUT when data
remain untransmitted due to zero window.

The number of unanswered zero-window probes (tcp_probes_out) is
reset to zero with incoming acks irrespective of the window size,
as described in tcp_probe_timer():

    RFC 1122 4.2.2.17 requires the sender to stay open indefinitely
    as long as the receiver continues to respond probes. We support
    this by default and reset icsk_probes_out with incoming ACKs.

This counter, however, is the wrong one to be used in calculating the
duration that the window remains closed and data remain untransmitted.
Thanks to Jonathan Maxwell <jmaxwell37@gmail.com> for diagnosing the
actual issue.

In this patch a new timestamp is introduced for the socket in order to
track the elapsed time for the zero-window probes that have not been
answered with any non-zero window ack.

Fixes: 9721e709fa68 ("tcp: simplify window probe aborting on USER_TIMEOUT")
Reported-by: William McCall <william.mccall@gmail.com>
Co-developed-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Enke Chen <enchen@paloaltonetworks.com>
Reviewed-by: Yuchung Cheng <ycheng@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/20210115223058.GA39267@localhost.localdomain
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
include/net/inet_connection_sock.h
net/ipv4/inet_connection_sock.c
net/ipv4/tcp.c
net/ipv4/tcp_input.c
net/ipv4/tcp_output.c
net/ipv4/tcp_timer.c

index 7338b3865a2a3d278dc27c0167bba1b966bbda9f..111d7771b208150d4e6f445b2675648b4de0eb7d 100644 (file)
@@ -76,6 +76,8 @@ struct inet_connection_sock_af_ops {
  * @icsk_ext_hdr_len:     Network protocol overhead (IP/IPv6 options)
  * @icsk_ack:             Delayed ACK control data
  * @icsk_mtup;            MTU probing control data
+ * @icsk_probes_tstamp:    Probe timestamp (cleared by non-zero window ack)
+ * @icsk_user_timeout:    TCP_USER_TIMEOUT value
  */
 struct inet_connection_sock {
        /* inet_sock has to be the first member! */
@@ -129,6 +131,7 @@ struct inet_connection_sock {
 
                u32               probe_timestamp;
        } icsk_mtup;
+       u32                       icsk_probes_tstamp;
        u32                       icsk_user_timeout;
 
        u64                       icsk_ca_priv[104 / sizeof(u64)];
index fd8b8800a2c3022666f46b9ba2ac984f7cf6b04d..6bd7ca09af03dd5385096f749cf05afecb4b7795 100644 (file)
@@ -851,6 +851,7 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
                newicsk->icsk_retransmits = 0;
                newicsk->icsk_backoff     = 0;
                newicsk->icsk_probes_out  = 0;
+               newicsk->icsk_probes_tstamp = 0;
 
                /* Deinitialize accept_queue to trap illegal accesses. */
                memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue));
index ed42d2193c5c76bc9d48f36c13e72ca5be8aee1f..32545ecf2ab105739e9a751b5202a7b3c9d6b22e 100644 (file)
@@ -2937,6 +2937,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 
        icsk->icsk_backoff = 0;
        icsk->icsk_probes_out = 0;
+       icsk->icsk_probes_tstamp = 0;
        icsk->icsk_rto = TCP_TIMEOUT_INIT;
        icsk->icsk_rto_min = TCP_RTO_MIN;
        icsk->icsk_delack_max = TCP_DELACK_MAX;
index c7e16b0ed791fcbd864860d6216339542e286929..bafcab75f4256486b18ed5a87399b926be5bc047 100644 (file)
@@ -3384,6 +3384,7 @@ static void tcp_ack_probe(struct sock *sk)
                return;
        if (!after(TCP_SKB_CB(head)->end_seq, tcp_wnd_end(tp))) {
                icsk->icsk_backoff = 0;
+               icsk->icsk_probes_tstamp = 0;
                inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
                /* Socket must be waked up by subsequent tcp_data_snd_check().
                 * This function is not for random using!
index f322e798a3519153472434a0a4a85449a2da20ce..ab458697881eda4503d6f223480deadf754d2d22 100644 (file)
@@ -4084,6 +4084,7 @@ void tcp_send_probe0(struct sock *sk)
                /* Cancel probe timer, if it is not required. */
                icsk->icsk_probes_out = 0;
                icsk->icsk_backoff = 0;
+               icsk->icsk_probes_tstamp = 0;
                return;
        }
 
index 6c62b9ea1320d9bbd26ed86b9f41de02fee6c491..454732ecc8f33833534922d7761a7df5344f21c8 100644 (file)
@@ -349,6 +349,7 @@ static void tcp_probe_timer(struct sock *sk)
 
        if (tp->packets_out || !skb) {
                icsk->icsk_probes_out = 0;
+               icsk->icsk_probes_tstamp = 0;
                return;
        }
 
@@ -360,13 +361,12 @@ static void tcp_probe_timer(struct sock *sk)
         * corresponding system limit. We also implement similar policy when
         * we use RTO to probe window in tcp_retransmit_timer().
         */
-       if (icsk->icsk_user_timeout) {
-               u32 elapsed = tcp_model_timeout(sk, icsk->icsk_probes_out,
-                                               tcp_probe0_base(sk));
-
-               if (elapsed >= icsk->icsk_user_timeout)
-                       goto abort;
-       }
+       if (!icsk->icsk_probes_tstamp)
+               icsk->icsk_probes_tstamp = tcp_jiffies32;
+       else if (icsk->icsk_user_timeout &&
+                (s32)(tcp_jiffies32 - icsk->icsk_probes_tstamp) >=
+                msecs_to_jiffies(icsk->icsk_user_timeout))
+               goto abort;
 
        max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2;
        if (sock_flag(sk, SOCK_DEAD)) {