Commit | Line | Data |
---|---|---|
a7868ea6 BE |
1 | /* |
2 | * H-TCP congestion control. The algorithm is detailed in: | |
3 | * R.N.Shorten, D.J.Leith: | |
4 | * "H-TCP: TCP for high-speed and long-distance networks" | |
5 | * Proc. PFLDnet, Argonne, 2004. | |
6 | * http://www.hamilton.ie/net/htcp3.pdf | |
7 | */ | |
8 | ||
a7868ea6 BE |
9 | #include <linux/mm.h> |
10 | #include <linux/module.h> | |
11 | #include <net/tcp.h> | |
12 | ||
13 | #define ALPHA_BASE (1<<7) /* 1.0 with shift << 7 */ | |
14 | #define BETA_MIN (1<<6) /* 0.5 with shift << 7 */ | |
15 | #define BETA_MAX 102 /* 0.8 with shift << 7 */ | |
16 | ||
17 | static int use_rtt_scaling = 1; | |
18 | module_param(use_rtt_scaling, int, 0644); | |
19 | MODULE_PARM_DESC(use_rtt_scaling, "turn on/off RTT scaling"); | |
20 | ||
21 | static int use_bandwidth_switch = 1; | |
22 | module_param(use_bandwidth_switch, int, 0644); | |
23 | MODULE_PARM_DESC(use_bandwidth_switch, "turn on/off bandwidth switcher"); | |
24 | ||
25 | struct htcp { | |
26 | u16 alpha; /* Fixed point arith, << 7 */ | |
27 | u8 beta; /* Fixed point arith, << 7 */ | |
28 | u8 modeswitch; /* Delay modeswitch until we had at least one congestion event */ | |
50bf3e22 BE |
29 | u32 last_cong; /* Time since last congestion event end */ |
30 | u32 undo_last_cong; | |
0bc6d90b BE |
31 | u16 pkts_acked; |
32 | u32 packetcount; | |
a7868ea6 BE |
33 | u32 minRTT; |
34 | u32 maxRTT; | |
a7868ea6 BE |
35 | |
36 | u32 undo_maxRTT; | |
37 | u32 undo_old_maxB; | |
38 | ||
39 | /* Bandwidth estimation */ | |
40 | u32 minB; | |
41 | u32 maxB; | |
42 | u32 old_maxB; | |
43 | u32 Bi; | |
44 | u32 lasttime; | |
45 | }; | |
46 | ||
50bf3e22 BE |
47 | static inline u32 htcp_cong_time(struct htcp *ca) |
48 | { | |
49 | return jiffies - ca->last_cong; | |
50 | } | |
51 | ||
52 | static inline u32 htcp_ccount(struct htcp *ca) | |
53 | { | |
54 | return htcp_cong_time(ca)/ca->minRTT; | |
55 | } | |
56 | ||
a7868ea6 BE |
57 | static inline void htcp_reset(struct htcp *ca) |
58 | { | |
50bf3e22 | 59 | ca->undo_last_cong = ca->last_cong; |
a7868ea6 BE |
60 | ca->undo_maxRTT = ca->maxRTT; |
61 | ca->undo_old_maxB = ca->old_maxB; | |
62 | ||
50bf3e22 | 63 | ca->last_cong = jiffies; |
a7868ea6 BE |
64 | } |
65 | ||
6687e988 | 66 | static u32 htcp_cwnd_undo(struct sock *sk) |
a7868ea6 | 67 | { |
6687e988 ACM |
68 | const struct tcp_sock *tp = tcp_sk(sk); |
69 | struct htcp *ca = inet_csk_ca(sk); | |
50bf3e22 | 70 | ca->last_cong = ca->undo_last_cong; |
a7868ea6 BE |
71 | ca->maxRTT = ca->undo_maxRTT; |
72 | ca->old_maxB = ca->undo_old_maxB; | |
73 | return max(tp->snd_cwnd, (tp->snd_ssthresh<<7)/ca->beta); | |
74 | } | |
75 | ||
6687e988 | 76 | static inline void measure_rtt(struct sock *sk) |
a7868ea6 | 77 | { |
6687e988 ACM |
78 | const struct inet_connection_sock *icsk = inet_csk(sk); |
79 | const struct tcp_sock *tp = tcp_sk(sk); | |
80 | struct htcp *ca = inet_csk_ca(sk); | |
a7868ea6 BE |
81 | u32 srtt = tp->srtt>>3; |
82 | ||
83 | /* keep track of minimum RTT seen so far, minRTT is zero at first */ | |
84 | if (ca->minRTT > srtt || !ca->minRTT) | |
85 | ca->minRTT = srtt; | |
86 | ||
87 | /* max RTT */ | |
50bf3e22 | 88 | if (icsk->icsk_ca_state == TCP_CA_Open && tp->snd_ssthresh < 0xFFFF && htcp_ccount(ca) > 3) { |
a7868ea6 BE |
89 | if (ca->maxRTT < ca->minRTT) |
90 | ca->maxRTT = ca->minRTT; | |
c33ad6e4 | 91 | if (ca->maxRTT < srtt && srtt <= ca->maxRTT+msecs_to_jiffies(20)) |
a7868ea6 BE |
92 | ca->maxRTT = srtt; |
93 | } | |
94 | } | |
95 | ||
6687e988 | 96 | static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked) |
a7868ea6 | 97 | { |
6687e988 ACM |
98 | const struct inet_connection_sock *icsk = inet_csk(sk); |
99 | const struct tcp_sock *tp = tcp_sk(sk); | |
100 | struct htcp *ca = inet_csk_ca(sk); | |
a7868ea6 BE |
101 | u32 now = tcp_time_stamp; |
102 | ||
0bc6d90b BE |
103 | if (icsk->icsk_ca_state == TCP_CA_Open) |
104 | ca->pkts_acked = pkts_acked; | |
105 | ||
106 | if (!use_bandwidth_switch) | |
107 | return; | |
108 | ||
a7868ea6 | 109 | /* achieved throughput calculations */ |
6687e988 ACM |
110 | if (icsk->icsk_ca_state != TCP_CA_Open && |
111 | icsk->icsk_ca_state != TCP_CA_Disorder) { | |
a7868ea6 BE |
112 | ca->packetcount = 0; |
113 | ca->lasttime = now; | |
114 | return; | |
115 | } | |
116 | ||
117 | ca->packetcount += pkts_acked; | |
118 | ||
119 | if (ca->packetcount >= tp->snd_cwnd - (ca->alpha>>7? : 1) | |
120 | && now - ca->lasttime >= ca->minRTT | |
121 | && ca->minRTT > 0) { | |
122 | __u32 cur_Bi = ca->packetcount*HZ/(now - ca->lasttime); | |
50bf3e22 | 123 | if (htcp_ccount(ca) <= 3) { |
a7868ea6 BE |
124 | /* just after backoff */ |
125 | ca->minB = ca->maxB = ca->Bi = cur_Bi; | |
126 | } else { | |
127 | ca->Bi = (3*ca->Bi + cur_Bi)/4; | |
128 | if (ca->Bi > ca->maxB) | |
129 | ca->maxB = ca->Bi; | |
130 | if (ca->minB > ca->maxB) | |
131 | ca->minB = ca->maxB; | |
132 | } | |
133 | ca->packetcount = 0; | |
134 | ca->lasttime = now; | |
135 | } | |
136 | } | |
137 | ||
138 | static inline void htcp_beta_update(struct htcp *ca, u32 minRTT, u32 maxRTT) | |
139 | { | |
140 | if (use_bandwidth_switch) { | |
141 | u32 maxB = ca->maxB; | |
142 | u32 old_maxB = ca->old_maxB; | |
143 | ca->old_maxB = ca->maxB; | |
144 | ||
145 | if (!between(5*maxB, 4*old_maxB, 6*old_maxB)) { | |
146 | ca->beta = BETA_MIN; | |
147 | ca->modeswitch = 0; | |
148 | return; | |
149 | } | |
150 | } | |
151 | ||
c33ad6e4 | 152 | if (ca->modeswitch && minRTT > msecs_to_jiffies(10) && maxRTT) { |
a7868ea6 BE |
153 | ca->beta = (minRTT<<7)/maxRTT; |
154 | if (ca->beta < BETA_MIN) | |
155 | ca->beta = BETA_MIN; | |
156 | else if (ca->beta > BETA_MAX) | |
157 | ca->beta = BETA_MAX; | |
158 | } else { | |
159 | ca->beta = BETA_MIN; | |
160 | ca->modeswitch = 1; | |
161 | } | |
162 | } | |
163 | ||
164 | static inline void htcp_alpha_update(struct htcp *ca) | |
165 | { | |
166 | u32 minRTT = ca->minRTT; | |
167 | u32 factor = 1; | |
50bf3e22 | 168 | u32 diff = htcp_cong_time(ca); |
a7868ea6 BE |
169 | |
170 | if (diff > HZ) { | |
171 | diff -= HZ; | |
172 | factor = 1+ ( 10*diff + ((diff/2)*(diff/2)/HZ) )/HZ; | |
173 | } | |
174 | ||
175 | if (use_rtt_scaling && minRTT) { | |
176 | u32 scale = (HZ<<3)/(10*minRTT); | |
177 | scale = min(max(scale, 1U<<2), 10U<<3); /* clamping ratio to interval [0.5,10]<<3 */ | |
178 | factor = (factor<<3)/scale; | |
179 | if (!factor) | |
180 | factor = 1; | |
181 | } | |
182 | ||
183 | ca->alpha = 2*factor*((1<<7)-ca->beta); | |
184 | if (!ca->alpha) | |
185 | ca->alpha = ALPHA_BASE; | |
186 | } | |
187 | ||
188 | /* After we have the rtt data to calculate beta, we'd still prefer to wait one | |
189 | * rtt before we adjust our beta to ensure we are working from a consistent | |
190 | * data. | |
191 | * | |
192 | * This function should be called when we hit a congestion event since only at | |
193 | * that point do we really have a real sense of maxRTT (the queues en route | |
194 | * were getting just too full now). | |
195 | */ | |
6687e988 | 196 | static void htcp_param_update(struct sock *sk) |
a7868ea6 | 197 | { |
6687e988 | 198 | struct htcp *ca = inet_csk_ca(sk); |
a7868ea6 BE |
199 | u32 minRTT = ca->minRTT; |
200 | u32 maxRTT = ca->maxRTT; | |
201 | ||
202 | htcp_beta_update(ca, minRTT, maxRTT); | |
203 | htcp_alpha_update(ca); | |
204 | ||
205 | /* add slowly fading memory for maxRTT to accommodate routing changes etc */ | |
206 | if (minRTT > 0 && maxRTT > minRTT) | |
207 | ca->maxRTT = minRTT + ((maxRTT-minRTT)*95)/100; | |
208 | } | |
209 | ||
6687e988 | 210 | static u32 htcp_recalc_ssthresh(struct sock *sk) |
a7868ea6 | 211 | { |
6687e988 ACM |
212 | const struct tcp_sock *tp = tcp_sk(sk); |
213 | const struct htcp *ca = inet_csk_ca(sk); | |
214 | htcp_param_update(sk); | |
a7868ea6 BE |
215 | return max((tp->snd_cwnd * ca->beta) >> 7, 2U); |
216 | } | |
217 | ||
6687e988 | 218 | static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, |
a7868ea6 BE |
219 | u32 in_flight, int data_acked) |
220 | { | |
6687e988 ACM |
221 | struct tcp_sock *tp = tcp_sk(sk); |
222 | struct htcp *ca = inet_csk_ca(sk); | |
a7868ea6 | 223 | |
f4805ede | 224 | if (!tcp_is_cwnd_limited(sk, in_flight)) |
a7868ea6 BE |
225 | return; |
226 | ||
7faffa1c SH |
227 | if (tp->snd_cwnd <= tp->snd_ssthresh) |
228 | tcp_slow_start(tp); | |
229 | else { | |
230 | ||
6687e988 | 231 | measure_rtt(sk); |
a7868ea6 | 232 | |
7faffa1c | 233 | /* In dangerous area, increase slowly. |
a7868ea6 BE |
234 | * In theory this is tp->snd_cwnd += alpha / tp->snd_cwnd |
235 | */ | |
0bc6d90b | 236 | if ((tp->snd_cwnd_cnt * ca->alpha)>>7 >= tp->snd_cwnd) { |
a7868ea6 BE |
237 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) |
238 | tp->snd_cwnd++; | |
239 | tp->snd_cwnd_cnt = 0; | |
50bf3e22 | 240 | htcp_alpha_update(ca); |
0bc6d90b BE |
241 | } else |
242 | tp->snd_cwnd_cnt += ca->pkts_acked; | |
243 | ||
244 | ca->pkts_acked = 1; | |
a7868ea6 BE |
245 | } |
246 | } | |
247 | ||
6687e988 | 248 | static void htcp_init(struct sock *sk) |
a7868ea6 | 249 | { |
6687e988 | 250 | struct htcp *ca = inet_csk_ca(sk); |
a7868ea6 BE |
251 | |
252 | memset(ca, 0, sizeof(struct htcp)); | |
253 | ca->alpha = ALPHA_BASE; | |
254 | ca->beta = BETA_MIN; | |
0bc6d90b | 255 | ca->pkts_acked = 1; |
50bf3e22 | 256 | ca->last_cong = jiffies; |
a7868ea6 BE |
257 | } |
258 | ||
6687e988 | 259 | static void htcp_state(struct sock *sk, u8 new_state) |
a7868ea6 BE |
260 | { |
261 | switch (new_state) { | |
50bf3e22 BE |
262 | case TCP_CA_Open: |
263 | { | |
264 | struct htcp *ca = inet_csk_ca(sk); | |
265 | ca->last_cong = jiffies; | |
266 | } | |
267 | break; | |
a7868ea6 BE |
268 | case TCP_CA_CWR: |
269 | case TCP_CA_Recovery: | |
270 | case TCP_CA_Loss: | |
6687e988 | 271 | htcp_reset(inet_csk_ca(sk)); |
a7868ea6 BE |
272 | break; |
273 | } | |
274 | } | |
275 | ||
276 | static struct tcp_congestion_ops htcp = { | |
277 | .init = htcp_init, | |
278 | .ssthresh = htcp_recalc_ssthresh, | |
a7868ea6 BE |
279 | .cong_avoid = htcp_cong_avoid, |
280 | .set_state = htcp_state, | |
281 | .undo_cwnd = htcp_cwnd_undo, | |
282 | .pkts_acked = measure_achieved_throughput, | |
283 | .owner = THIS_MODULE, | |
284 | .name = "htcp", | |
285 | }; | |
286 | ||
287 | static int __init htcp_register(void) | |
288 | { | |
6687e988 | 289 | BUG_ON(sizeof(struct htcp) > ICSK_CA_PRIV_SIZE); |
a7868ea6 | 290 | BUILD_BUG_ON(BETA_MIN >= BETA_MAX); |
a7868ea6 BE |
291 | return tcp_register_congestion_control(&htcp); |
292 | } | |
293 | ||
294 | static void __exit htcp_unregister(void) | |
295 | { | |
296 | tcp_unregister_congestion_control(&htcp); | |
297 | } | |
298 | ||
299 | module_init(htcp_register); | |
300 | module_exit(htcp_unregister); | |
301 | ||
302 | MODULE_AUTHOR("Baruch Even"); | |
303 | MODULE_LICENSE("GPL"); | |
304 | MODULE_DESCRIPTION("H-TCP"); |