Commit | Line | Data |
---|---|---|
145ca25e PZ |
1 | /* |
2 | * Floating proportions | |
3 | * | |
4 | * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | |
5 | * | |
6 | * Description: | |
7 | * | |
8 | * The floating proportion is a time derivative with an exponentially decaying | |
9 | * history: | |
10 | * | |
11 | * p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i) | |
12 | * | |
13 | * Where j is an element from {prop_local}, x_{j} is j's number of events, | |
14 | * and i the time period over which the differential is taken. So d/dt_{-i} is | |
15 | * the differential over the i-th last period. | |
16 | * | |
17 | * The decaying history gives smooth transitions. The time differential carries | |
18 | * the notion of speed. | |
19 | * | |
20 | * The denominator is 2^(1+i) because we want the series to be normalised, ie. | |
21 | * | |
22 | * \Sum_{i=0} 1/2^(1+i) = 1 | |
23 | * | |
24 | * Further more, if we measure time (t) in the same events as x; so that: | |
25 | * | |
26 | * t = \Sum_{j} x_{j} | |
27 | * | |
28 | * we get that: | |
29 | * | |
30 | * \Sum_{j} p_{j} = 1 | |
31 | * | |
32 | * Writing this in an iterative fashion we get (dropping the 'd's): | |
33 | * | |
34 | * if (++x_{j}, ++t > period) | |
35 | * t /= 2; | |
36 | * for_each (j) | |
37 | * x_{j} /= 2; | |
38 | * | |
39 | * so that: | |
40 | * | |
41 | * p_{j} = x_{j} / t; | |
42 | * | |
43 | * We optimize away the '/= 2' for the global time delta by noting that: | |
44 | * | |
45 | * if (++t > period) t /= 2: | |
46 | * | |
47 | * Can be approximated by: | |
48 | * | |
49 | * period/2 + (++t % period/2) | |
50 | * | |
51 | * [ Furthermore, when we choose period to be 2^n it can be written in terms of | |
52 | * binary operations and wraparound artefacts disappear. ] | |
53 | * | |
54 | * Also note that this yields a natural counter of the elapsed periods: | |
55 | * | |
56 | * c = t / (period/2) | |
57 | * | |
58 | * [ Its monotonic increasing property can be applied to mitigate the wrap- | |
59 | * around issue. ] | |
60 | * | |
61 | * This allows us to do away with the loop over all prop_locals on each period | |
62 | * expiration. By remembering the period count under which it was last accessed | |
63 | * as c_{j}, we can obtain the number of 'missed' cycles from: | |
64 | * | |
65 | * c - c_{j} | |
66 | * | |
67 | * We can then lazily catch up to the global period count every time we are | |
68 | * going to use x_{j}, by doing: | |
69 | * | |
70 | * x_{j} /= 2^(c - c_{j}), c_{j} = c | |
71 | */ | |
72 | ||
73 | #include <linux/proportions.h> | |
74 | #include <linux/rcupdate.h> | |
75 | ||
145ca25e PZ |
76 | int prop_descriptor_init(struct prop_descriptor *pd, int shift) |
77 | { | |
78 | int err; | |
79 | ||
80 | if (shift > PROP_MAX_SHIFT) | |
81 | shift = PROP_MAX_SHIFT; | |
82 | ||
83 | pd->index = 0; | |
84 | pd->pg[0].shift = shift; | |
85 | mutex_init(&pd->mutex); | |
86 | err = percpu_counter_init_irq(&pd->pg[0].events, 0); | |
87 | if (err) | |
88 | goto out; | |
89 | ||
90 | err = percpu_counter_init_irq(&pd->pg[1].events, 0); | |
91 | if (err) | |
92 | percpu_counter_destroy(&pd->pg[0].events); | |
93 | ||
94 | out: | |
95 | return err; | |
96 | } | |
97 | ||
98 | /* | |
99 | * We have two copies, and flip between them to make it seem like an atomic | |
100 | * update. The update is not really atomic wrt the events counter, but | |
101 | * it is internally consistent with the bit layout depending on shift. | |
102 | * | |
103 | * We copy the events count, move the bits around and flip the index. | |
104 | */ | |
105 | void prop_change_shift(struct prop_descriptor *pd, int shift) | |
106 | { | |
107 | int index; | |
108 | int offset; | |
109 | u64 events; | |
110 | unsigned long flags; | |
111 | ||
112 | if (shift > PROP_MAX_SHIFT) | |
113 | shift = PROP_MAX_SHIFT; | |
114 | ||
115 | mutex_lock(&pd->mutex); | |
116 | ||
117 | index = pd->index ^ 1; | |
118 | offset = pd->pg[pd->index].shift - shift; | |
119 | if (!offset) | |
120 | goto out; | |
121 | ||
122 | pd->pg[index].shift = shift; | |
123 | ||
124 | local_irq_save(flags); | |
125 | events = percpu_counter_sum(&pd->pg[pd->index].events); | |
126 | if (offset < 0) | |
127 | events <<= -offset; | |
128 | else | |
129 | events >>= offset; | |
130 | percpu_counter_set(&pd->pg[index].events, events); | |
131 | ||
132 | /* | |
133 | * ensure the new pg is fully written before the switch | |
134 | */ | |
135 | smp_wmb(); | |
136 | pd->index = index; | |
137 | local_irq_restore(flags); | |
138 | ||
139 | synchronize_rcu(); | |
140 | ||
141 | out: | |
142 | mutex_unlock(&pd->mutex); | |
143 | } | |
144 | ||
145 | /* | |
146 | * wrap the access to the data in an rcu_read_lock() section; | |
147 | * this is used to track the active references. | |
148 | */ | |
149 | static struct prop_global *prop_get_global(struct prop_descriptor *pd) | |
150 | { | |
151 | int index; | |
152 | ||
153 | rcu_read_lock(); | |
154 | index = pd->index; | |
155 | /* | |
156 | * match the wmb from vcd_flip() | |
157 | */ | |
158 | smp_rmb(); | |
159 | return &pd->pg[index]; | |
160 | } | |
161 | ||
162 | static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg) | |
163 | { | |
164 | rcu_read_unlock(); | |
165 | } | |
166 | ||
167 | static void | |
168 | prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift) | |
169 | { | |
170 | int offset = *pl_shift - new_shift; | |
171 | ||
172 | if (!offset) | |
173 | return; | |
174 | ||
175 | if (offset < 0) | |
176 | *pl_period <<= -offset; | |
177 | else | |
178 | *pl_period >>= offset; | |
179 | ||
180 | *pl_shift = new_shift; | |
181 | } | |
182 | ||
183 | /* | |
184 | * PERCPU | |
185 | */ | |
186 | ||
f16b34aa PZ |
187 | #define PROP_BATCH (8*(1+ilog2(nr_cpu_ids))) |
188 | ||
145ca25e PZ |
189 | int prop_local_init_percpu(struct prop_local_percpu *pl) |
190 | { | |
191 | spin_lock_init(&pl->lock); | |
192 | pl->shift = 0; | |
193 | pl->period = 0; | |
194 | return percpu_counter_init_irq(&pl->events, 0); | |
195 | } | |
196 | ||
197 | void prop_local_destroy_percpu(struct prop_local_percpu *pl) | |
198 | { | |
199 | percpu_counter_destroy(&pl->events); | |
200 | } | |
201 | ||
202 | /* | |
203 | * Catch up with missed period expirations. | |
204 | * | |
205 | * until (c_{j} == c) | |
206 | * x_{j} -= x_{j}/2; | |
207 | * c_{j}++; | |
208 | */ | |
209 | static | |
210 | void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl) | |
211 | { | |
212 | unsigned long period = 1UL << (pg->shift - 1); | |
213 | unsigned long period_mask = ~(period - 1); | |
214 | unsigned long global_period; | |
215 | unsigned long flags; | |
216 | ||
217 | global_period = percpu_counter_read(&pg->events); | |
218 | global_period &= period_mask; | |
219 | ||
220 | /* | |
221 | * Fast path - check if the local and global period count still match | |
222 | * outside of the lock. | |
223 | */ | |
224 | if (pl->period == global_period) | |
225 | return; | |
226 | ||
227 | spin_lock_irqsave(&pl->lock, flags); | |
228 | prop_adjust_shift(&pl->shift, &pl->period, pg->shift); | |
f16b34aa | 229 | |
145ca25e PZ |
230 | /* |
231 | * For each missed period, we half the local counter. | |
232 | * basically: | |
233 | * pl->events >> (global_period - pl->period); | |
145ca25e | 234 | */ |
f16b34aa PZ |
235 | period = (global_period - pl->period) >> (pg->shift - 1); |
236 | if (period < BITS_PER_LONG) { | |
237 | s64 val = percpu_counter_read(&pl->events); | |
238 | ||
239 | if (val < (nr_cpu_ids * PROP_BATCH)) | |
240 | val = percpu_counter_sum(&pl->events); | |
241 | ||
242 | __percpu_counter_add(&pl->events, -val + (val >> period), | |
243 | PROP_BATCH); | |
244 | } else | |
245 | percpu_counter_set(&pl->events, 0); | |
246 | ||
145ca25e PZ |
247 | pl->period = global_period; |
248 | spin_unlock_irqrestore(&pl->lock, flags); | |
249 | } | |
250 | ||
251 | /* | |
252 | * ++x_{j}, ++t | |
253 | */ | |
254 | void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl) | |
255 | { | |
256 | struct prop_global *pg = prop_get_global(pd); | |
257 | ||
258 | prop_norm_percpu(pg, pl); | |
f16b34aa | 259 | __percpu_counter_add(&pl->events, 1, PROP_BATCH); |
145ca25e PZ |
260 | percpu_counter_add(&pg->events, 1); |
261 | prop_put_global(pd, pg); | |
262 | } | |
263 | ||
a42dde04 PZ |
264 | /* |
265 | * identical to __prop_inc_percpu, except that it limits this pl's fraction to | |
266 | * @frac/PROP_FRAC_BASE by ignoring events when this limit has been exceeded. | |
267 | */ | |
268 | void __prop_inc_percpu_max(struct prop_descriptor *pd, | |
269 | struct prop_local_percpu *pl, long frac) | |
270 | { | |
271 | struct prop_global *pg = prop_get_global(pd); | |
272 | ||
273 | prop_norm_percpu(pg, pl); | |
274 | ||
275 | if (unlikely(frac != PROP_FRAC_BASE)) { | |
276 | unsigned long period_2 = 1UL << (pg->shift - 1); | |
277 | unsigned long counter_mask = period_2 - 1; | |
278 | unsigned long global_count; | |
279 | long numerator, denominator; | |
280 | ||
281 | numerator = percpu_counter_read_positive(&pl->events); | |
282 | global_count = percpu_counter_read(&pg->events); | |
283 | denominator = period_2 + (global_count & counter_mask); | |
284 | ||
285 | if (numerator > ((denominator * frac) >> PROP_FRAC_SHIFT)) | |
286 | goto out_put; | |
287 | } | |
288 | ||
289 | percpu_counter_add(&pl->events, 1); | |
290 | percpu_counter_add(&pg->events, 1); | |
291 | ||
292 | out_put: | |
293 | prop_put_global(pd, pg); | |
294 | } | |
295 | ||
145ca25e PZ |
296 | /* |
297 | * Obtain a fraction of this proportion | |
298 | * | |
299 | * p_{j} = x_{j} / (period/2 + t % period/2) | |
300 | */ | |
301 | void prop_fraction_percpu(struct prop_descriptor *pd, | |
302 | struct prop_local_percpu *pl, | |
303 | long *numerator, long *denominator) | |
304 | { | |
305 | struct prop_global *pg = prop_get_global(pd); | |
306 | unsigned long period_2 = 1UL << (pg->shift - 1); | |
307 | unsigned long counter_mask = period_2 - 1; | |
308 | unsigned long global_count; | |
309 | ||
310 | prop_norm_percpu(pg, pl); | |
311 | *numerator = percpu_counter_read_positive(&pl->events); | |
312 | ||
313 | global_count = percpu_counter_read(&pg->events); | |
314 | *denominator = period_2 + (global_count & counter_mask); | |
315 | ||
316 | prop_put_global(pd, pg); | |
317 | } | |
318 | ||
319 | /* | |
320 | * SINGLE | |
321 | */ | |
322 | ||
323 | int prop_local_init_single(struct prop_local_single *pl) | |
324 | { | |
325 | spin_lock_init(&pl->lock); | |
326 | pl->shift = 0; | |
327 | pl->period = 0; | |
328 | pl->events = 0; | |
329 | return 0; | |
330 | } | |
331 | ||
332 | void prop_local_destroy_single(struct prop_local_single *pl) | |
333 | { | |
334 | } | |
335 | ||
336 | /* | |
337 | * Catch up with missed period expirations. | |
338 | */ | |
339 | static | |
340 | void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl) | |
341 | { | |
342 | unsigned long period = 1UL << (pg->shift - 1); | |
343 | unsigned long period_mask = ~(period - 1); | |
344 | unsigned long global_period; | |
345 | unsigned long flags; | |
346 | ||
347 | global_period = percpu_counter_read(&pg->events); | |
348 | global_period &= period_mask; | |
349 | ||
350 | /* | |
351 | * Fast path - check if the local and global period count still match | |
352 | * outside of the lock. | |
353 | */ | |
354 | if (pl->period == global_period) | |
355 | return; | |
356 | ||
357 | spin_lock_irqsave(&pl->lock, flags); | |
358 | prop_adjust_shift(&pl->shift, &pl->period, pg->shift); | |
359 | /* | |
360 | * For each missed period, we half the local counter. | |
361 | */ | |
362 | period = (global_period - pl->period) >> (pg->shift - 1); | |
363 | if (likely(period < BITS_PER_LONG)) | |
364 | pl->events >>= period; | |
365 | else | |
366 | pl->events = 0; | |
367 | pl->period = global_period; | |
368 | spin_unlock_irqrestore(&pl->lock, flags); | |
369 | } | |
370 | ||
371 | /* | |
372 | * ++x_{j}, ++t | |
373 | */ | |
374 | void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl) | |
375 | { | |
376 | struct prop_global *pg = prop_get_global(pd); | |
377 | ||
378 | prop_norm_single(pg, pl); | |
379 | pl->events++; | |
380 | percpu_counter_add(&pg->events, 1); | |
381 | prop_put_global(pd, pg); | |
382 | } | |
383 | ||
384 | /* | |
385 | * Obtain a fraction of this proportion | |
386 | * | |
387 | * p_{j} = x_{j} / (period/2 + t % period/2) | |
388 | */ | |
389 | void prop_fraction_single(struct prop_descriptor *pd, | |
390 | struct prop_local_single *pl, | |
391 | long *numerator, long *denominator) | |
392 | { | |
393 | struct prop_global *pg = prop_get_global(pd); | |
394 | unsigned long period_2 = 1UL << (pg->shift - 1); | |
395 | unsigned long counter_mask = period_2 - 1; | |
396 | unsigned long global_count; | |
397 | ||
398 | prop_norm_single(pg, pl); | |
399 | *numerator = pl->events; | |
400 | ||
401 | global_count = percpu_counter_read(&pg->events); | |
402 | *denominator = period_2 + (global_count & counter_mask); | |
403 | ||
404 | prop_put_global(pd, pg); | |
405 | } |