diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 2a6c0dd665a4..e0a2ca7456ff 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -54,6 +54,7 @@ struct dctcp { u32 next_seq; u32 ce_state; u32 loss_cwnd; + struct tcp_plb_state plb; }; static unsigned int dctcp_shift_g __read_mostly = 4; /* g = 1/2^4 */ @@ -91,6 +92,8 @@ static void dctcp_init(struct sock *sk) ca->ce_state = 0; dctcp_reset(tp, ca); + tcp_plb_init(sk, &ca->plb); + return; } @@ -117,14 +120,28 @@ static void dctcp_update_alpha(struct sock *sk, u32 flags) /* Expired RTT */ if (!before(tp->snd_una, ca->next_seq)) { + u32 delivered = tp->delivered - ca->old_delivered; u32 delivered_ce = tp->delivered_ce - ca->old_delivered_ce; u32 alpha = ca->dctcp_alpha; + u32 ce_ratio = 0; + + if (delivered > 0) { + /* dctcp_alpha keeps EWMA of fraction of ECN marked + * packets. Because of EWMA smoothing, PLB reaction can + * be slow so we use ce_ratio which is an instantaneous + * measure of congestion. ce_ratio is the fraction of + * ECN marked packets in the previous RTT. + */ + if (delivered_ce > 0) + ce_ratio = (delivered_ce << TCP_PLB_SCALE) / delivered; + tcp_plb_update_state(sk, &ca->plb, (int)ce_ratio); + tcp_plb_check_rehash(sk, &ca->plb); + } /* alpha = (1 - g) * alpha + g * F */ alpha -= min_not_zero(alpha, alpha >> dctcp_shift_g); if (delivered_ce) { - u32 delivered = tp->delivered - ca->old_delivered; /* If dctcp_shift_g == 1, a 32bit value would overflow * after 8 M packets. @@ -172,8 +189,12 @@ static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev) dctcp_ece_ack_update(sk, ev, &ca->prior_rcv_nxt, &ca->ce_state); break; case CA_EVENT_LOSS: + tcp_plb_update_state_upon_rto(sk, &ca->plb); dctcp_react_to_loss(sk); break; + case CA_EVENT_TX_START: + tcp_plb_check_rehash(sk, &ca->plb); /* Maybe rehash when inflight is 0 */ + break; default: /* Don't care for the rest. */ break;