tcp: introduce tcp_under_memory_pressure()
[deliverable/linux.git] / net / ipv4 / tcp_input.c
index 3a4d9b34bed44a2f6b77b2be0e753867bad32292..093779f7e8930dad58aa60c00f6a1225c4cc20e2 100644 (file)
@@ -359,7 +359,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
        /* Check #1 */
        if (tp->rcv_ssthresh < tp->window_clamp &&
            (int)tp->rcv_ssthresh < tcp_space(sk) &&
-           !sk_under_memory_pressure(sk)) {
+           !tcp_under_memory_pressure(sk)) {
                int incr;
 
                /* Check #2. Increase window, if skb with such overhead
@@ -446,7 +446,7 @@ static void tcp_clamp_window(struct sock *sk)
 
        if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
            !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
-           !sk_under_memory_pressure(sk) &&
+           !tcp_under_memory_pressure(sk) &&
            sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
                sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
                                    sysctl_tcp_rmem[2]);
@@ -1130,7 +1130,12 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
 struct tcp_sacktag_state {
        int     reord;
        int     fack_count;
-       long    rtt_us; /* RTT measured by SACKing never-retransmitted data */
+       /* Timestamps for earliest and latest never-retransmitted segment
+        * that was SACKed. RTO needs the earliest RTT to stay conservative,
+        * but congestion control should still get an accurate delay signal.
+        */
+       struct skb_mstamp first_sackt;
+       struct skb_mstamp last_sackt;
        int     flag;
 };
 
@@ -1233,14 +1238,9 @@ static u8 tcp_sacktag_one(struct sock *sk,
                                                           state->reord);
                                if (!after(end_seq, tp->high_seq))
                                        state->flag |= FLAG_ORIG_SACK_ACKED;
-                               /* Pick the earliest sequence sacked for RTT */
-                               if (state->rtt_us < 0) {
-                                       struct skb_mstamp now;
-
-                                       skb_mstamp_get(&now);
-                                       state->rtt_us = skb_mstamp_us_delta(&now,
-                                                               xmit_time);
-                               }
+                               if (state->first_sackt.v64 == 0)
+                                       state->first_sackt = *xmit_time;
+                               state->last_sackt = *xmit_time;
                        }
 
                        if (sacked & TCPCB_LOST) {
@@ -1634,7 +1634,7 @@ static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_bl
 
 static int
 tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
-                       u32 prior_snd_una, long *sack_rtt_us)
+                       u32 prior_snd_una, struct tcp_sacktag_state *state)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        const unsigned char *ptr = (skb_transport_header(ack_skb) +
@@ -1642,7 +1642,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
        struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
        struct tcp_sack_block sp[TCP_NUM_SACKS];
        struct tcp_sack_block *cache;
-       struct tcp_sacktag_state state;
        struct sk_buff *skb;
        int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
        int used_sacks;
@@ -1650,9 +1649,8 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
        int i, j;
        int first_sack_index;
 
-       state.flag = 0;
-       state.reord = tp->packets_out;
-       state.rtt_us = -1L;
+       state->flag = 0;
+       state->reord = tp->packets_out;
 
        if (!tp->sacked_out) {
                if (WARN_ON(tp->fackets_out))
@@ -1663,7 +1661,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
        found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
                                         num_sacks, prior_snd_una);
        if (found_dup_sack)
-               state.flag |= FLAG_DSACKING_ACK;
+               state->flag |= FLAG_DSACKING_ACK;
 
        /* Eliminate too old ACKs, but take into
         * account more or less fresh ones, they can
@@ -1728,7 +1726,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
        }
 
        skb = tcp_write_queue_head(sk);
-       state.fack_count = 0;
+       state->fack_count = 0;
        i = 0;
 
        if (!tp->sacked_out) {
@@ -1762,10 +1760,10 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
 
                        /* Head todo? */
                        if (before(start_seq, cache->start_seq)) {
-                               skb = tcp_sacktag_skip(skb, sk, &state,
+                               skb = tcp_sacktag_skip(skb, sk, state,
                                                       start_seq);
                                skb = tcp_sacktag_walk(skb, sk, next_dup,
-                                                      &state,
+                                                      state,
                                                       start_seq,
                                                       cache->start_seq,
                                                       dup_sack);
@@ -1776,7 +1774,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
                                goto advance_sp;
 
                        skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
-                                                      &state,
+                                                      state,
                                                       cache->end_seq);
 
                        /* ...tail remains todo... */
@@ -1785,12 +1783,12 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
                                skb = tcp_highest_sack(sk);
                                if (!skb)
                                        break;
-                               state.fack_count = tp->fackets_out;
+                               state->fack_count = tp->fackets_out;
                                cache++;
                                goto walk;
                        }
 
-                       skb = tcp_sacktag_skip(skb, sk, &state, cache->end_seq);
+                       skb = tcp_sacktag_skip(skb, sk, state, cache->end_seq);
                        /* Check overlap against next cached too (past this one already) */
                        cache++;
                        continue;
@@ -1800,12 +1798,12 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
                        skb = tcp_highest_sack(sk);
                        if (!skb)
                                break;
-                       state.fack_count = tp->fackets_out;
+                       state->fack_count = tp->fackets_out;
                }
-               skb = tcp_sacktag_skip(skb, sk, &state, start_seq);
+               skb = tcp_sacktag_skip(skb, sk, state, start_seq);
 
 walk:
-               skb = tcp_sacktag_walk(skb, sk, next_dup, &state,
+               skb = tcp_sacktag_walk(skb, sk, next_dup, state,
                                       start_seq, end_seq, dup_sack);
 
 advance_sp:
@@ -1820,14 +1818,12 @@ advance_sp:
        for (j = 0; j < used_sacks; j++)
                tp->recv_sack_cache[i++] = sp[j];
 
-       tcp_mark_lost_retrans(sk);
-
-       tcp_verify_left_out(tp);
-
-       if ((state.reord < tp->fackets_out) &&
+       if ((state->reord < tp->fackets_out) &&
            ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
-               tcp_update_reordering(sk, tp->fackets_out - state.reord, 0);
+               tcp_update_reordering(sk, tp->fackets_out - state->reord, 0);
 
+       tcp_mark_lost_retrans(sk);
+       tcp_verify_left_out(tp);
 out:
 
 #if FASTRETRANS_DEBUG > 0
@@ -1836,8 +1832,7 @@ out:
        WARN_ON((int)tp->retrans_out < 0);
        WARN_ON((int)tcp_packets_in_flight(tp) < 0);
 #endif
-       *sack_rtt_us = state.rtt_us;
-       return state.flag;
+       return state->flag;
 }
 
 /* Limits sacked_out so that sum with lost_out isn't ever larger than
@@ -3054,7 +3049,8 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
  * arrived at the other end.
  */
 static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
-                              u32 prior_snd_una, long sack_rtt_us)
+                              u32 prior_snd_una,
+                              struct tcp_sacktag_state *sack)
 {
        const struct inet_connection_sock *icsk = inet_csk(sk);
        struct skb_mstamp first_ackt, last_ackt, now;
@@ -3062,8 +3058,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
        u32 prior_sacked = tp->sacked_out;
        u32 reord = tp->packets_out;
        bool fully_acked = true;
-       long ca_seq_rtt_us = -1L;
+       long sack_rtt_us = -1L;
        long seq_rtt_us = -1L;
+       long ca_rtt_us = -1L;
        struct sk_buff *skb;
        u32 pkts_acked = 0;
        bool rtt_update;
@@ -3152,15 +3149,16 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
        skb_mstamp_get(&now);
        if (likely(first_ackt.v64)) {
                seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt);
-               ca_seq_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
+               ca_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
+       }
+       if (sack->first_sackt.v64) {
+               sack_rtt_us = skb_mstamp_us_delta(&now, &sack->first_sackt);
+               ca_rtt_us = skb_mstamp_us_delta(&now, &sack->last_sackt);
        }
 
        rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us);
 
        if (flag & FLAG_ACKED) {
-               const struct tcp_congestion_ops *ca_ops
-                       = inet_csk(sk)->icsk_ca_ops;
-
                tcp_rearm_rto(sk);
                if (unlikely(icsk->icsk_mtup.probe_size &&
                             !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
@@ -3183,11 +3181,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 
                tp->fackets_out -= min(pkts_acked, tp->fackets_out);
 
-               if (ca_ops->pkts_acked) {
-                       long rtt_us = min_t(ulong, ca_seq_rtt_us, sack_rtt_us);
-                       ca_ops->pkts_acked(sk, pkts_acked, rtt_us);
-               }
-
        } else if (skb && rtt_update && sack_rtt_us >= 0 &&
                   sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) {
                /* Do not re-arm RTO if the sack RTT is measured from data sent
@@ -3197,6 +3190,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
                tcp_rearm_rto(sk);
        }
 
+       if (icsk->icsk_ca_ops->pkts_acked)
+               icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked, ca_rtt_us);
+
 #if FASTRETRANS_DEBUG > 0
        WARN_ON((int)tp->sacked_out < 0);
        WARN_ON((int)tp->lost_out < 0);
@@ -3237,7 +3233,7 @@ static void tcp_ack_probe(struct sock *sk)
                 * This function is not for random using!
                 */
        } else {
-               unsigned long when = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
+               unsigned long when = tcp_probe0_when(sk, TCP_RTO_MAX);
 
                inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
                                          when, TCP_RTO_MAX);
@@ -3280,6 +3276,24 @@ static inline bool tcp_may_update_window(const struct tcp_sock *tp,
                (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd);
 }
 
+/* If we update tp->snd_una, also update tp->bytes_acked */
+static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack)
+{
+       u32 delta = ack - tp->snd_una;
+
+       tp->bytes_acked += delta;
+       tp->snd_una = ack;
+}
+
+/* If we update tp->rcv_nxt, also update tp->bytes_received */
+static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq)
+{
+       u32 delta = seq - tp->rcv_nxt;
+
+       tp->bytes_received += delta;
+       tp->rcv_nxt = seq;
+}
+
 /* Update our send window.
  *
  * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2
@@ -3315,7 +3329,7 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
                }
        }
 
-       tp->snd_una = ack;
+       tcp_snd_una_update(tp, ack);
 
        return flag;
 }
@@ -3443,6 +3457,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 {
        struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
+       struct tcp_sacktag_state sack_state;
        u32 prior_snd_una = tp->snd_una;
        u32 ack_seq = TCP_SKB_CB(skb)->seq;
        u32 ack = TCP_SKB_CB(skb)->ack_seq;
@@ -3451,7 +3466,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
        int prior_packets = tp->packets_out;
        const int prior_unsacked = tp->packets_out - tp->sacked_out;
        int acked = 0; /* Number of packets newly acked */
-       long sack_rtt_us = -1L;
+
+       sack_state.first_sackt.v64 = 0;
 
        /* We very likely will need to access write queue head. */
        prefetchw(sk->sk_write_queue.next);
@@ -3497,7 +3513,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
                 * Note, we use the fact that SND.UNA>=SND.WL2.
                 */
                tcp_update_wl(tp, ack_seq);
-               tp->snd_una = ack;
+               tcp_snd_una_update(tp, ack);
                flag |= FLAG_WIN_UPDATE;
 
                tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE);
@@ -3515,7 +3531,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 
                if (TCP_SKB_CB(skb)->sacked)
                        flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
-                                                       &sack_rtt_us);
+                                                       &sack_state);
 
                if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
                        flag |= FLAG_ECE;
@@ -3540,7 +3556,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
        /* See if we can take anything off of the retransmit queue. */
        acked = tp->packets_out;
        flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una,
-                                   sack_rtt_us);
+                                   &sack_state);
        acked -= tp->packets_out;
 
        /* Advance cwnd if state allows */
@@ -3592,7 +3608,7 @@ old_ack:
         */
        if (TCP_SKB_CB(skb)->sacked) {
                flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
-                                               &sack_rtt_us);
+                                               &sack_state);
                tcp_fastretrans_alert(sk, acked, prior_unsacked,
                                      is_dupack, flag);
        }
@@ -4236,7 +4252,7 @@ static void tcp_ofo_queue(struct sock *sk)
 
                tail = skb_peek_tail(&sk->sk_receive_queue);
                eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
-               tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+               tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
                if (!eaten)
                        __skb_queue_tail(&sk->sk_receive_queue, skb);
                if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
@@ -4404,7 +4420,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int
        __skb_pull(skb, hdrlen);
        eaten = (tail &&
                 tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0;
-       tcp_sk(sk)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+       tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
        if (!eaten) {
                __skb_queue_tail(&sk->sk_receive_queue, skb);
                skb_set_owner_r(skb, sk);
@@ -4497,7 +4513,7 @@ queue_and_out:
 
                        eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
                }
-               tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+               tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
                if (skb->len)
                        tcp_event_data_recv(sk, skb);
                if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
@@ -4765,7 +4781,7 @@ static int tcp_prune_queue(struct sock *sk)
 
        if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
                tcp_clamp_window(sk);
-       else if (sk_under_memory_pressure(sk))
+       else if (tcp_under_memory_pressure(sk))
                tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
 
        tcp_collapse_ofo_queue(sk);
@@ -4809,7 +4825,7 @@ static bool tcp_should_expand_sndbuf(const struct sock *sk)
                return false;
 
        /* If we are under global TCP memory pressure, do not expand.  */
-       if (sk_under_memory_pressure(sk))
+       if (tcp_under_memory_pressure(sk))
                return false;
 
        /* If we are under soft global TCP memory pressure, do not expand.  */
@@ -5245,7 +5261,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
                                        tcp_rcv_rtt_measure_ts(sk, skb);
 
                                        __skb_pull(skb, tcp_header_len);
-                                       tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+                                       tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
                                        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER);
                                        eaten = 1;
                                }
@@ -6044,6 +6060,23 @@ static bool tcp_syn_flood_action(struct sock *sk,
        return want_cookie;
 }
 
+static void tcp_reqsk_record_syn(const struct sock *sk,
+                                struct request_sock *req,
+                                const struct sk_buff *skb)
+{
+       if (tcp_sk(sk)->save_syn) {
+               u32 len = skb_network_header_len(skb) + tcp_hdrlen(skb);
+               u32 *copy;
+
+               copy = kmalloc(len + sizeof(u32), GFP_ATOMIC);
+               if (copy) {
+                       copy[0] = len;
+                       memcpy(&copy[1], skb_network_header(skb), len);
+                       req->saved_syn = copy;
+               }
+       }
+}
+
 int tcp_conn_request(struct request_sock_ops *rsk_ops,
                     const struct tcp_request_sock_ops *af_ops,
                     struct sock *sk, struct sk_buff *skb)
@@ -6176,6 +6209,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
                tcp_rsk(req)->tfo_listener = false;
                af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
        }
+       tcp_reqsk_record_syn(sk, req, skb);
 
        return 0;
 
This page took 0.033632 seconds and 5 git commands to generate.