mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-11 12:28:41 +08:00
dccp ccid-2: Perform congestion-window validation
CCID-2's cwnd increases like TCP during slow-start, which has implications for * the local Sequence Window value (should be > cwnd), * the Ack Ratio value. Hence an exponential growth, if it does not reflect the actual network conditions, can quickly lead to instability. This patch adds congestion-window validation (RFC2861) to CCID-2: * cwnd is constrained if the sender is application limited; * cwnd is reduced after a long idle period, as suggested in the '90 paper by Van Jacobson, in RFC 2581 (sec. 4.1); * cwnd is never reduced below the RFC 3390 initial window. As marked in the comments, the code is actually almost a direct copy of the TCP congestion-window-validation algorithms. By continuing this work, it may in future be possible to use the TCP code (not possible at the moment). The mechanism can be turned off using a module parameter. Sampling of the currently-used window (moving-maximum) is however done constantly; this is used to determine the expected window, which can be exploited to regulate DCCP's Sequence Window value. This patch also sets slow-start-after-idle (RFC 4341, 5.1), i.e. it behaves like TCP when net.ipv4.tcp_slow_start_after_idle = 1. Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
This commit is contained in:
parent
58fdea0f31
commit
113ced1f52
@ -153,17 +153,93 @@ out:
|
|||||||
sock_put(sk);
|
sock_put(sk);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Congestion window validation (RFC 2861).
|
||||||
|
*/
|
||||||
|
static int ccid2_do_cwv = 1;
|
||||||
|
module_param(ccid2_do_cwv, bool, 0644);
|
||||||
|
MODULE_PARM_DESC(ccid2_do_cwv, "Perform RFC2861 Congestion Window Validation");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ccid2_update_used_window - Track how much of cwnd is actually used
|
||||||
|
* This is done in addition to CWV. The sender needs to have an idea of how many
|
||||||
|
* packets may be in flight, to set the local Sequence Window value accordingly
|
||||||
|
* (RFC 4340, 7.5.2). The CWV mechanism is exploited to keep track of the
|
||||||
|
* maximum-used window. We use an EWMA low-pass filter to filter out noise.
|
||||||
|
*/
|
||||||
|
static void ccid2_update_used_window(struct ccid2_hc_tx_sock *hc, u32 new_wnd)
|
||||||
|
{
|
||||||
|
hc->tx_expected_wnd = (3 * hc->tx_expected_wnd + new_wnd) / 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* This borrows the code of tcp_cwnd_application_limited() */
|
||||||
|
static void ccid2_cwnd_application_limited(struct sock *sk, const u32 now)
|
||||||
|
{
|
||||||
|
struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
|
||||||
|
/* don't reduce cwnd below the initial window (IW) */
|
||||||
|
u32 init_win = rfc3390_bytes_to_packets(dccp_sk(sk)->dccps_mss_cache),
|
||||||
|
win_used = max(hc->tx_cwnd_used, init_win);
|
||||||
|
|
||||||
|
if (win_used < hc->tx_cwnd) {
|
||||||
|
hc->tx_ssthresh = max(hc->tx_ssthresh,
|
||||||
|
(hc->tx_cwnd >> 1) + (hc->tx_cwnd >> 2));
|
||||||
|
hc->tx_cwnd = (hc->tx_cwnd + win_used) >> 1;
|
||||||
|
}
|
||||||
|
hc->tx_cwnd_used = 0;
|
||||||
|
hc->tx_cwnd_stamp = now;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* This borrows the code of tcp_cwnd_restart() */
|
||||||
|
static void ccid2_cwnd_restart(struct sock *sk, const u32 now)
|
||||||
|
{
|
||||||
|
struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
|
||||||
|
u32 cwnd = hc->tx_cwnd, restart_cwnd,
|
||||||
|
iwnd = rfc3390_bytes_to_packets(dccp_sk(sk)->dccps_mss_cache);
|
||||||
|
|
||||||
|
hc->tx_ssthresh = max(hc->tx_ssthresh, (cwnd >> 1) + (cwnd >> 2));
|
||||||
|
|
||||||
|
/* don't reduce cwnd below the initial window (IW) */
|
||||||
|
restart_cwnd = min(cwnd, iwnd);
|
||||||
|
cwnd >>= (now - hc->tx_lsndtime) / hc->tx_rto;
|
||||||
|
hc->tx_cwnd = max(cwnd, restart_cwnd);
|
||||||
|
|
||||||
|
hc->tx_cwnd_stamp = now;
|
||||||
|
hc->tx_cwnd_used = 0;
|
||||||
|
}
|
||||||
|
|
||||||
static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
|
static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
|
||||||
{
|
{
|
||||||
struct dccp_sock *dp = dccp_sk(sk);
|
struct dccp_sock *dp = dccp_sk(sk);
|
||||||
struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
|
struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
|
||||||
|
const u32 now = ccid2_time_stamp;
|
||||||
struct ccid2_seq *next;
|
struct ccid2_seq *next;
|
||||||
|
|
||||||
hc->tx_pipe++;
|
/* slow-start after idle periods (RFC 2581, RFC 2861) */
|
||||||
|
if (ccid2_do_cwv && !hc->tx_pipe &&
|
||||||
|
(s32)(now - hc->tx_lsndtime) >= hc->tx_rto)
|
||||||
|
ccid2_cwnd_restart(sk, now);
|
||||||
|
|
||||||
|
hc->tx_lsndtime = now;
|
||||||
|
hc->tx_pipe += 1;
|
||||||
|
|
||||||
|
/* see whether cwnd was fully used (RFC 2861), update expected window */
|
||||||
|
if (ccid2_cwnd_network_limited(hc)) {
|
||||||
|
ccid2_update_used_window(hc, hc->tx_cwnd);
|
||||||
|
hc->tx_cwnd_used = 0;
|
||||||
|
hc->tx_cwnd_stamp = now;
|
||||||
|
} else {
|
||||||
|
if (hc->tx_pipe > hc->tx_cwnd_used)
|
||||||
|
hc->tx_cwnd_used = hc->tx_pipe;
|
||||||
|
|
||||||
|
ccid2_update_used_window(hc, hc->tx_cwnd_used);
|
||||||
|
|
||||||
|
if (ccid2_do_cwv && (s32)(now - hc->tx_cwnd_stamp) >= hc->tx_rto)
|
||||||
|
ccid2_cwnd_application_limited(sk, now);
|
||||||
|
}
|
||||||
|
|
||||||
hc->tx_seqh->ccid2s_seq = dp->dccps_gss;
|
hc->tx_seqh->ccid2s_seq = dp->dccps_gss;
|
||||||
hc->tx_seqh->ccid2s_acked = 0;
|
hc->tx_seqh->ccid2s_acked = 0;
|
||||||
hc->tx_seqh->ccid2s_sent = ccid2_time_stamp;
|
hc->tx_seqh->ccid2s_sent = now;
|
||||||
|
|
||||||
next = hc->tx_seqh->ccid2s_next;
|
next = hc->tx_seqh->ccid2s_next;
|
||||||
/* check if we need to alloc more space */
|
/* check if we need to alloc more space */
|
||||||
@ -594,6 +670,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
|
|||||||
|
|
||||||
/* Use larger initial windows (RFC 4341, section 5). */
|
/* Use larger initial windows (RFC 4341, section 5). */
|
||||||
hc->tx_cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache);
|
hc->tx_cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache);
|
||||||
|
hc->tx_expected_wnd = hc->tx_cwnd;
|
||||||
|
|
||||||
/* Make sure that Ack Ratio is enabled and within bounds. */
|
/* Make sure that Ack Ratio is enabled and within bounds. */
|
||||||
max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2);
|
max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2);
|
||||||
@ -606,7 +683,8 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
|
|||||||
|
|
||||||
hc->tx_rto = DCCP_TIMEOUT_INIT;
|
hc->tx_rto = DCCP_TIMEOUT_INIT;
|
||||||
hc->tx_rpdupack = -1;
|
hc->tx_rpdupack = -1;
|
||||||
hc->tx_last_cong = ccid2_time_stamp;
|
hc->tx_last_cong = hc->tx_lsndtime = hc->tx_cwnd_stamp = ccid2_time_stamp;
|
||||||
|
hc->tx_cwnd_used = 0;
|
||||||
setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire,
|
setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire,
|
||||||
(unsigned long)sk);
|
(unsigned long)sk);
|
||||||
INIT_LIST_HEAD(&hc->tx_av_chunks);
|
INIT_LIST_HEAD(&hc->tx_av_chunks);
|
||||||
|
@ -53,6 +53,10 @@ struct ccid2_seq {
|
|||||||
* @tx_rttvar: moving average/maximum of @mdev_max
|
* @tx_rttvar: moving average/maximum of @mdev_max
|
||||||
* @tx_rto: RTO value deriving from SRTT and RTTVAR (RFC 2988)
|
* @tx_rto: RTO value deriving from SRTT and RTTVAR (RFC 2988)
|
||||||
* @tx_rtt_seq: to decay RTTVAR at most once per flight
|
* @tx_rtt_seq: to decay RTTVAR at most once per flight
|
||||||
|
* @tx_cwnd_used: actually used cwnd, W_used of RFC 2861
|
||||||
|
* @tx_expected_wnd: moving average of @tx_cwnd_used
|
||||||
|
* @tx_cwnd_stamp: to track idle periods in CWV
|
||||||
|
* @tx_lsndtime: last time (in jiffies) a data packet was sent
|
||||||
* @tx_rpseq: last consecutive seqno
|
* @tx_rpseq: last consecutive seqno
|
||||||
* @tx_rpdupack: dupacks since rpseq
|
* @tx_rpdupack: dupacks since rpseq
|
||||||
* @tx_av_chunks: list of Ack Vectors received on current skb
|
* @tx_av_chunks: list of Ack Vectors received on current skb
|
||||||
@ -76,6 +80,12 @@ struct ccid2_hc_tx_sock {
|
|||||||
u64 tx_rtt_seq:48;
|
u64 tx_rtt_seq:48;
|
||||||
struct timer_list tx_rtotimer;
|
struct timer_list tx_rtotimer;
|
||||||
|
|
||||||
|
/* Congestion Window validation (optional, RFC 2861) */
|
||||||
|
u32 tx_cwnd_used,
|
||||||
|
tx_expected_wnd,
|
||||||
|
tx_cwnd_stamp,
|
||||||
|
tx_lsndtime;
|
||||||
|
|
||||||
u64 tx_rpseq;
|
u64 tx_rpseq;
|
||||||
int tx_rpdupack;
|
int tx_rpdupack;
|
||||||
u32 tx_last_cong;
|
u32 tx_last_cong;
|
||||||
|
Loading…
Reference in New Issue
Block a user