Skip to content

Commit

Permalink
tcp: record most recent RTT in RACK loss detection
Browse files Browse the repository at this point in the history
Record the most recent RTT in RACK. It is often identical to the
"ca_rtt_us" values in tcp_clean_rtx_queue. But when the packet has
been retransmitted, RACK choses to believe the ACK is for the
(latest) retransmitted packet if the RTT is over minimum RTT.

This requires passing the arrival time of the most recent ACK to
RACK routines. The timestamp is now recorded in the "ack_time"
in tcp_sacktag_state during the ACK processing.

This patch does not change the RACK algorithm itself. It only adds
the RTT variable to prepare the next main patch.

Signed-off-by: Yuchung Cheng <[email protected]>
Signed-off-by: Neal Cardwell <[email protected]>
Acked-by: Eric Dumazet <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
yuchungcheng authored and davem330 committed Jan 14, 2017
1 parent e636f8b commit deed7be
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 35 deletions.
1 change: 1 addition & 0 deletions include/linux/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ struct tcp_sock {
/* Information of the most recently (s)acked skb */
struct tcp_rack {
struct skb_mstamp mstamp; /* (Re)sent time of the skb */
u32 rtt_us; /* Associated RTT */
u8 advanced; /* mstamp advanced since last lost marking */
u8 reord; /* reordering detected */
} rack;
Expand Down
7 changes: 4 additions & 3 deletions include/net/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -1863,9 +1863,10 @@ extern int sysctl_tcp_recovery;
/* Use TCP RACK to detect (some) tail and retransmit losses */
#define TCP_RACK_LOST_RETRANS 0x1

extern void tcp_rack_mark_lost(struct sock *sk);
extern void tcp_rack_advance(struct tcp_sock *tp,
const struct skb_mstamp *xmit_time, u8 sacked);
extern void tcp_rack_mark_lost(struct sock *sk, const struct skb_mstamp *now);
extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked,
const struct skb_mstamp *xmit_time,
const struct skb_mstamp *ack_time);

/*
* Save and compile IPv4 options, return a pointer to it
Expand Down
36 changes: 22 additions & 14 deletions net/ipv4/tcp_input.c
Original file line number Diff line number Diff line change
Expand Up @@ -1135,6 +1135,7 @@ struct tcp_sacktag_state {
*/
struct skb_mstamp first_sackt;
struct skb_mstamp last_sackt;
struct skb_mstamp ack_time; /* Timestamp when the S/ACK was received */
struct rate_sample *rate;
int flag;
};
Expand Down Expand Up @@ -1217,7 +1218,7 @@ static u8 tcp_sacktag_one(struct sock *sk,
return sacked;

if (!(sacked & TCPCB_SACKED_ACKED)) {
tcp_rack_advance(tp, xmit_time, sacked);
tcp_rack_advance(tp, sacked, xmit_time, &state->ack_time);

if (sacked & TCPCB_SACKED_RETRANS) {
/* If the segment is not tagged as lost,
Expand Down Expand Up @@ -2813,7 +2814,8 @@ static bool tcp_try_undo_partial(struct sock *sk, const int acked)
* tcp_xmit_retransmit_queue().
*/
static void tcp_fastretrans_alert(struct sock *sk, const int acked,
bool is_dupack, int *ack_flag, int *rexmit)
bool is_dupack, int *ack_flag, int *rexmit,
const struct skb_mstamp *ack_time)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
Expand Down Expand Up @@ -2868,7 +2870,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
if (sysctl_tcp_recovery & TCP_RACK_LOST_RETRANS) {
u32 prior_retrans = tp->retrans_out;

tcp_rack_mark_lost(sk);
tcp_rack_mark_lost(sk, ack_time);
if (prior_retrans > tp->retrans_out) {
flag |= FLAG_LOST_RETRANS;
*ack_flag |= FLAG_LOST_RETRANS;
Expand Down Expand Up @@ -3105,11 +3107,11 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
*/
static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
u32 prior_snd_una, int *acked,
struct tcp_sacktag_state *sack,
struct skb_mstamp *now)
struct tcp_sacktag_state *sack)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct skb_mstamp first_ackt, last_ackt;
struct skb_mstamp *now = &sack->ack_time;
struct tcp_sock *tp = tcp_sk(sk);
u32 prior_sacked = tp->sacked_out;
u32 reord = tp->packets_out;
Expand Down Expand Up @@ -3169,7 +3171,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
} else if (tcp_is_sack(tp)) {
tp->delivered += acked_pcount;
if (!tcp_skb_spurious_retrans(tp, skb))
tcp_rack_advance(tp, &skb->skb_mstamp, sacked);
tcp_rack_advance(tp, sacked,
&skb->skb_mstamp,
&sack->ack_time);
}
if (sacked & TCPCB_LOST)
tp->lost_out -= acked_pcount;
Expand Down Expand Up @@ -3599,7 +3603,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
u32 lost = tp->lost;
int acked = 0; /* Number of packets newly acked */
int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */
struct skb_mstamp now;

sack_state.first_sackt.v64 = 0;
sack_state.rate = &rs;
Expand All @@ -3625,7 +3628,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (after(ack, tp->snd_nxt))
goto invalid_ack;

skb_mstamp_get(&now);
skb_mstamp_get(&sack_state.ack_time);

if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
Expand Down Expand Up @@ -3693,11 +3696,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)

/* See if we can take anything off of the retransmit queue. */
flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked,
&sack_state, &now);
&sack_state);

if (tcp_ack_is_dubious(sk, flag)) {
is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit,
&sack_state.ack_time);
}
if (tp->tlp_high_seq)
tcp_process_tlp_ack(sk, ack, flag);
Expand All @@ -3712,15 +3716,17 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
tcp_schedule_loss_probe(sk);
delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */
lost = tp->lost - lost; /* freshly marked lost */
tcp_rate_gen(sk, delivered, lost, &now, &rs);
tcp_cong_control(sk, ack, delivered, flag, &rs);
tcp_rate_gen(sk, delivered, lost, &sack_state.ack_time,
sack_state.rate);
tcp_cong_control(sk, ack, delivered, flag, sack_state.rate);
tcp_xmit_recovery(sk, rexmit);
return 1;

no_queue:
/* If data was DSACKed, see if we can undo a cwnd reduction. */
if (flag & FLAG_DSACKING_ACK)
tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit,
&sack_state.ack_time);
/* If this ack opens up a zero window, clear backoff. It was
* being used to time the probes, and is probably far higher than
* it needs to be for normal retransmission.
Expand All @@ -3741,9 +3747,11 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
* If data was DSACKed, see if we can undo a cwnd reduction.
*/
if (TCP_SKB_CB(skb)->sacked) {
skb_mstamp_get(&sack_state.ack_time);
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
&sack_state);
tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit,
&sack_state.ack_time);
tcp_xmit_recovery(sk, rexmit);
}

Expand Down
41 changes: 23 additions & 18 deletions net/ipv4/tcp_recovery.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
* The current version is only used after recovery starts but can be
* easily extended to detect the first loss.
*/
static void tcp_rack_detect_loss(struct sock *sk)
static void tcp_rack_detect_loss(struct sock *sk, const struct skb_mstamp *now)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
Expand Down Expand Up @@ -62,13 +62,14 @@ static void tcp_rack_detect_loss(struct sock *sk)
continue;

if (skb_mstamp_after(&tp->rack.mstamp, &skb->skb_mstamp)) {

if (skb_mstamp_us_delta(&tp->rack.mstamp,
&skb->skb_mstamp) <= reo_wnd)
continue;

/* skb is lost if packet sent later is sacked */
tcp_rack_mark_skb_lost(sk, skb);
/* Step 3 in draft-cheng-tcpm-rack-00.txt:
* A packet is lost if its elapsed time is beyond
* the recent RTT plus the reordering window.
*/
if (skb_mstamp_us_delta(now, &skb->skb_mstamp) >
tp->rack.rtt_us + reo_wnd) {
tcp_rack_mark_skb_lost(sk, skb);
}
} else if (!(scb->sacked & TCPCB_RETRANS)) {
/* Original data are sent sequentially so stop early
* b/c the rest are all sent after rack_sent
Expand All @@ -78,28 +79,33 @@ static void tcp_rack_detect_loss(struct sock *sk)
}
}

void tcp_rack_mark_lost(struct sock *sk)
void tcp_rack_mark_lost(struct sock *sk, const struct skb_mstamp *now)
{
struct tcp_sock *tp = tcp_sk(sk);

if (inet_csk(sk)->icsk_ca_state < TCP_CA_Recovery || !tp->rack.advanced)
return;
/* Reset the advanced flag to avoid unnecessary queue scanning */
tp->rack.advanced = 0;
tcp_rack_detect_loss(sk);
tcp_rack_detect_loss(sk, now);
}

/* Record the most recently (re)sent time among the (s)acked packets */
void tcp_rack_advance(struct tcp_sock *tp,
const struct skb_mstamp *xmit_time, u8 sacked)
/* Record the most recently (re)sent time among the (s)acked packets
* This is "Step 3: Advance RACK.xmit_time and update RACK.RTT" from
* draft-cheng-tcpm-rack-00.txt
*/
void tcp_rack_advance(struct tcp_sock *tp, u8 sacked,
const struct skb_mstamp *xmit_time,
const struct skb_mstamp *ack_time)
{
u32 rtt_us;

if (tp->rack.mstamp.v64 &&
!skb_mstamp_after(xmit_time, &tp->rack.mstamp))
return;

rtt_us = skb_mstamp_us_delta(ack_time, xmit_time);
if (sacked & TCPCB_RETRANS) {
struct skb_mstamp now;

/* If the sacked packet was retransmitted, it's ambiguous
* whether the retransmission or the original (or the prior
* retransmission) was sacked.
Expand All @@ -110,11 +116,10 @@ void tcp_rack_advance(struct tcp_sock *tp,
* so it's at least one RTT (i.e., retransmission is at least
* an RTT later).
*/
skb_mstamp_get(&now);
if (skb_mstamp_us_delta(&now, xmit_time) < tcp_min_rtt(tp))
if (rtt_us < tcp_min_rtt(tp))
return;
}

tp->rack.rtt_us = rtt_us;
tp->rack.mstamp = *xmit_time;
tp->rack.advanced = 1;
}

0 comments on commit deed7be

Please sign in to comment.