Commit | Line | Data |
---|---|---|
31e4c28d VG |
1 | /* |
2 | * Common Block IO controller cgroup interface | |
3 | * | |
4 | * Based on ideas and code from CFQ, CFS and BFQ: | |
5 | * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> | |
6 | * | |
7 | * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it> | |
8 | * Paolo Valente <paolo.valente@unimore.it> | |
9 | * | |
10 | * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com> | |
11 | * Nauman Rafique <nauman@google.com> | |
12 | */ | |
13 | #include <linux/ioprio.h> | |
22084190 VG |
14 | #include <linux/seq_file.h> |
15 | #include <linux/kdev_t.h> | |
9d6a986c | 16 | #include <linux/module.h> |
accee785 | 17 | #include <linux/err.h> |
9195291e | 18 | #include <linux/blkdev.h> |
31e4c28d | 19 | #include "blk-cgroup.h" |
3e252066 VG |
20 | |
21 | static DEFINE_SPINLOCK(blkio_list_lock); | |
22 | static LIST_HEAD(blkio_list); | |
b1c35769 | 23 | |
31e4c28d | 24 | struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT }; |
9d6a986c VG |
25 | EXPORT_SYMBOL_GPL(blkio_root_cgroup); |
26 | ||
67523c48 BB |
27 | static struct cgroup_subsys_state *blkiocg_create(struct cgroup_subsys *, |
28 | struct cgroup *); | |
29 | static int blkiocg_can_attach(struct cgroup_subsys *, struct cgroup *, | |
30 | struct task_struct *, bool); | |
31 | static void blkiocg_attach(struct cgroup_subsys *, struct cgroup *, | |
32 | struct cgroup *, struct task_struct *, bool); | |
33 | static void blkiocg_destroy(struct cgroup_subsys *, struct cgroup *); | |
34 | static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *); | |
35 | ||
36 | struct cgroup_subsys blkio_subsys = { | |
37 | .name = "blkio", | |
38 | .create = blkiocg_create, | |
39 | .can_attach = blkiocg_can_attach, | |
40 | .attach = blkiocg_attach, | |
41 | .destroy = blkiocg_destroy, | |
42 | .populate = blkiocg_populate, | |
43 | #ifdef CONFIG_BLK_CGROUP | |
44 | /* note: blkio_subsys_id is otherwise defined in blk-cgroup.h */ | |
45 | .subsys_id = blkio_subsys_id, | |
46 | #endif | |
47 | .use_id = 1, | |
48 | .module = THIS_MODULE, | |
49 | }; | |
50 | EXPORT_SYMBOL_GPL(blkio_subsys); | |
51 | ||
31e4c28d VG |
52 | struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup) |
53 | { | |
54 | return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id), | |
55 | struct blkio_cgroup, css); | |
56 | } | |
9d6a986c | 57 | EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup); |
31e4c28d | 58 | |
9195291e DS |
59 | /* |
60 | * Add to the appropriate stat variable depending on the request type. | |
61 | * This should be called with the blkg->stats_lock held. | |
62 | */ | |
63 | void io_add_stat(uint64_t *stat, uint64_t add, unsigned int flags) | |
64 | { | |
65 | if (flags & REQ_RW) | |
66 | stat[IO_WRITE] += add; | |
67 | else | |
68 | stat[IO_READ] += add; | |
69 | /* | |
70 | * Everywhere in the block layer, an IO is treated as sync if it is a | |
71 | * read or a SYNC write. We follow the same norm. | |
72 | */ | |
73 | if (!(flags & REQ_RW) || flags & REQ_RW_SYNC) | |
74 | stat[IO_SYNC] += add; | |
75 | else | |
76 | stat[IO_ASYNC] += add; | |
77 | } | |
78 | ||
303a3acb | 79 | void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time) |
22084190 | 80 | { |
303a3acb DS |
81 | unsigned long flags; |
82 | ||
83 | spin_lock_irqsave(&blkg->stats_lock, flags); | |
84 | blkg->stats.time += time; | |
85 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | |
22084190 | 86 | } |
303a3acb | 87 | EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used); |
22084190 | 88 | |
9195291e DS |
89 | void blkiocg_update_request_dispatch_stats(struct blkio_group *blkg, |
90 | struct request *rq) | |
91 | { | |
92 | struct blkio_group_stats *stats; | |
93 | unsigned long flags; | |
94 | ||
95 | spin_lock_irqsave(&blkg->stats_lock, flags); | |
96 | stats = &blkg->stats; | |
97 | stats->sectors += blk_rq_sectors(rq); | |
98 | io_add_stat(stats->io_serviced, 1, rq->cmd_flags); | |
99 | io_add_stat(stats->io_service_bytes, blk_rq_sectors(rq) << 9, | |
100 | rq->cmd_flags); | |
101 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | |
102 | } | |
103 | ||
104 | void blkiocg_update_request_completion_stats(struct blkio_group *blkg, | |
105 | struct request *rq) | |
106 | { | |
107 | struct blkio_group_stats *stats; | |
108 | unsigned long flags; | |
109 | unsigned long long now = sched_clock(); | |
110 | ||
111 | spin_lock_irqsave(&blkg->stats_lock, flags); | |
112 | stats = &blkg->stats; | |
113 | if (time_after64(now, rq->io_start_time_ns)) | |
114 | io_add_stat(stats->io_service_time, now - rq->io_start_time_ns, | |
115 | rq->cmd_flags); | |
116 | if (time_after64(rq->io_start_time_ns, rq->start_time_ns)) | |
117 | io_add_stat(stats->io_wait_time, | |
118 | rq->io_start_time_ns - rq->start_time_ns, | |
119 | rq->cmd_flags); | |
120 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | |
121 | } | |
122 | EXPORT_SYMBOL_GPL(blkiocg_update_request_completion_stats); | |
123 | ||
31e4c28d | 124 | void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, |
22084190 | 125 | struct blkio_group *blkg, void *key, dev_t dev) |
31e4c28d VG |
126 | { |
127 | unsigned long flags; | |
128 | ||
129 | spin_lock_irqsave(&blkcg->lock, flags); | |
130 | rcu_assign_pointer(blkg->key, key); | |
b1c35769 | 131 | blkg->blkcg_id = css_id(&blkcg->css); |
31e4c28d VG |
132 | hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); |
133 | spin_unlock_irqrestore(&blkcg->lock, flags); | |
2868ef7b VG |
134 | #ifdef CONFIG_DEBUG_BLK_CGROUP |
135 | /* Need to take css reference ? */ | |
136 | cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path)); | |
137 | #endif | |
22084190 | 138 | blkg->dev = dev; |
31e4c28d | 139 | } |
9d6a986c | 140 | EXPORT_SYMBOL_GPL(blkiocg_add_blkio_group); |
31e4c28d | 141 | |
b1c35769 VG |
142 | static void __blkiocg_del_blkio_group(struct blkio_group *blkg) |
143 | { | |
144 | hlist_del_init_rcu(&blkg->blkcg_node); | |
145 | blkg->blkcg_id = 0; | |
146 | } | |
147 | ||
148 | /* | |
149 | * returns 0 if blkio_group was still on cgroup list. Otherwise returns 1 | |
150 | * indicating that blk_group was unhashed by the time we got to it. | |
151 | */ | |
31e4c28d VG |
152 | int blkiocg_del_blkio_group(struct blkio_group *blkg) |
153 | { | |
b1c35769 VG |
154 | struct blkio_cgroup *blkcg; |
155 | unsigned long flags; | |
156 | struct cgroup_subsys_state *css; | |
157 | int ret = 1; | |
158 | ||
159 | rcu_read_lock(); | |
160 | css = css_lookup(&blkio_subsys, blkg->blkcg_id); | |
161 | if (!css) | |
162 | goto out; | |
163 | ||
164 | blkcg = container_of(css, struct blkio_cgroup, css); | |
165 | spin_lock_irqsave(&blkcg->lock, flags); | |
166 | if (!hlist_unhashed(&blkg->blkcg_node)) { | |
167 | __blkiocg_del_blkio_group(blkg); | |
168 | ret = 0; | |
169 | } | |
170 | spin_unlock_irqrestore(&blkcg->lock, flags); | |
171 | out: | |
172 | rcu_read_unlock(); | |
173 | return ret; | |
31e4c28d | 174 | } |
9d6a986c | 175 | EXPORT_SYMBOL_GPL(blkiocg_del_blkio_group); |
31e4c28d VG |
176 | |
177 | /* called under rcu_read_lock(). */ | |
178 | struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) | |
179 | { | |
180 | struct blkio_group *blkg; | |
181 | struct hlist_node *n; | |
182 | void *__key; | |
183 | ||
184 | hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) { | |
185 | __key = blkg->key; | |
186 | if (__key == key) | |
187 | return blkg; | |
188 | } | |
189 | ||
190 | return NULL; | |
191 | } | |
9d6a986c | 192 | EXPORT_SYMBOL_GPL(blkiocg_lookup_group); |
31e4c28d VG |
193 | |
194 | #define SHOW_FUNCTION(__VAR) \ | |
195 | static u64 blkiocg_##__VAR##_read(struct cgroup *cgroup, \ | |
196 | struct cftype *cftype) \ | |
197 | { \ | |
198 | struct blkio_cgroup *blkcg; \ | |
199 | \ | |
200 | blkcg = cgroup_to_blkio_cgroup(cgroup); \ | |
201 | return (u64)blkcg->__VAR; \ | |
202 | } | |
203 | ||
204 | SHOW_FUNCTION(weight); | |
205 | #undef SHOW_FUNCTION | |
206 | ||
207 | static int | |
208 | blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val) | |
209 | { | |
210 | struct blkio_cgroup *blkcg; | |
f8d461d6 VG |
211 | struct blkio_group *blkg; |
212 | struct hlist_node *n; | |
3e252066 | 213 | struct blkio_policy_type *blkiop; |
31e4c28d VG |
214 | |
215 | if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX) | |
216 | return -EINVAL; | |
217 | ||
218 | blkcg = cgroup_to_blkio_cgroup(cgroup); | |
bcf4dd43 | 219 | spin_lock(&blkio_list_lock); |
f8d461d6 | 220 | spin_lock_irq(&blkcg->lock); |
31e4c28d | 221 | blkcg->weight = (unsigned int)val; |
3e252066 | 222 | hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { |
3e252066 VG |
223 | list_for_each_entry(blkiop, &blkio_list, list) |
224 | blkiop->ops.blkio_update_group_weight_fn(blkg, | |
225 | blkcg->weight); | |
3e252066 | 226 | } |
f8d461d6 | 227 | spin_unlock_irq(&blkcg->lock); |
bcf4dd43 | 228 | spin_unlock(&blkio_list_lock); |
31e4c28d VG |
229 | return 0; |
230 | } | |
231 | ||
303a3acb DS |
232 | static int |
233 | blkiocg_reset_write(struct cgroup *cgroup, struct cftype *cftype, u64 val) | |
234 | { | |
235 | struct blkio_cgroup *blkcg; | |
236 | struct blkio_group *blkg; | |
237 | struct hlist_node *n; | |
238 | struct blkio_group_stats *stats; | |
239 | ||
240 | blkcg = cgroup_to_blkio_cgroup(cgroup); | |
241 | spin_lock_irq(&blkcg->lock); | |
242 | hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { | |
243 | spin_lock(&blkg->stats_lock); | |
244 | stats = &blkg->stats; | |
245 | memset(stats, 0, sizeof(struct blkio_group_stats)); | |
246 | spin_unlock(&blkg->stats_lock); | |
247 | } | |
248 | spin_unlock_irq(&blkcg->lock); | |
249 | return 0; | |
250 | } | |
251 | ||
252 | void get_key_name(int type, char *disk_id, char *str, int chars_left) | |
253 | { | |
254 | strlcpy(str, disk_id, chars_left); | |
255 | chars_left -= strlen(str); | |
256 | if (chars_left <= 0) { | |
257 | printk(KERN_WARNING | |
258 | "Possibly incorrect cgroup stat display format"); | |
259 | return; | |
260 | } | |
261 | switch (type) { | |
262 | case IO_READ: | |
263 | strlcat(str, " Read", chars_left); | |
264 | break; | |
265 | case IO_WRITE: | |
266 | strlcat(str, " Write", chars_left); | |
267 | break; | |
268 | case IO_SYNC: | |
269 | strlcat(str, " Sync", chars_left); | |
270 | break; | |
271 | case IO_ASYNC: | |
272 | strlcat(str, " Async", chars_left); | |
273 | break; | |
274 | case IO_TYPE_MAX: | |
275 | strlcat(str, " Total", chars_left); | |
276 | break; | |
277 | default: | |
278 | strlcat(str, " Invalid", chars_left); | |
279 | } | |
280 | } | |
281 | ||
282 | typedef uint64_t (get_var) (struct blkio_group *, int); | |
283 | ||
284 | #define MAX_KEY_LEN 100 | |
285 | uint64_t get_typed_stat(struct blkio_group *blkg, struct cgroup_map_cb *cb, | |
286 | get_var *getvar, char *disk_id) | |
287 | { | |
288 | uint64_t disk_total; | |
289 | char key_str[MAX_KEY_LEN]; | |
290 | int type; | |
291 | ||
292 | for (type = 0; type < IO_TYPE_MAX; type++) { | |
293 | get_key_name(type, disk_id, key_str, MAX_KEY_LEN); | |
294 | cb->fill(cb, key_str, getvar(blkg, type)); | |
295 | } | |
296 | disk_total = getvar(blkg, IO_READ) + getvar(blkg, IO_WRITE); | |
297 | get_key_name(IO_TYPE_MAX, disk_id, key_str, MAX_KEY_LEN); | |
298 | cb->fill(cb, key_str, disk_total); | |
299 | return disk_total; | |
300 | } | |
301 | ||
302 | uint64_t get_stat(struct blkio_group *blkg, struct cgroup_map_cb *cb, | |
303 | get_var *getvar, char *disk_id) | |
304 | { | |
305 | uint64_t var = getvar(blkg, 0); | |
306 | cb->fill(cb, disk_id, var); | |
307 | return var; | |
308 | } | |
309 | ||
310 | #define GET_STAT_INDEXED(__VAR) \ | |
311 | uint64_t get_##__VAR##_stat(struct blkio_group *blkg, int type) \ | |
312 | { \ | |
313 | return blkg->stats.__VAR[type]; \ | |
314 | } \ | |
315 | ||
316 | GET_STAT_INDEXED(io_service_bytes); | |
317 | GET_STAT_INDEXED(io_serviced); | |
318 | GET_STAT_INDEXED(io_service_time); | |
319 | GET_STAT_INDEXED(io_wait_time); | |
320 | #undef GET_STAT_INDEXED | |
321 | ||
322 | #define GET_STAT(__VAR, __CONV) \ | |
323 | uint64_t get_##__VAR##_stat(struct blkio_group *blkg, int dummy) \ | |
324 | { \ | |
325 | uint64_t data = blkg->stats.__VAR; \ | |
326 | if (__CONV) \ | |
327 | data = (uint64_t)jiffies_to_msecs(data) * NSEC_PER_MSEC;\ | |
328 | return data; \ | |
329 | } | |
330 | ||
331 | GET_STAT(time, 1); | |
332 | GET_STAT(sectors, 0); | |
333 | #ifdef CONFIG_DEBUG_BLK_CGROUP | |
334 | GET_STAT(dequeue, 0); | |
335 | #endif | |
336 | #undef GET_STAT | |
337 | ||
338 | #define SHOW_FUNCTION_PER_GROUP(__VAR, get_stats, getvar, show_total) \ | |
22084190 | 339 | static int blkiocg_##__VAR##_read(struct cgroup *cgroup, \ |
303a3acb | 340 | struct cftype *cftype, struct cgroup_map_cb *cb) \ |
22084190 VG |
341 | { \ |
342 | struct blkio_cgroup *blkcg; \ | |
343 | struct blkio_group *blkg; \ | |
344 | struct hlist_node *n; \ | |
303a3acb DS |
345 | uint64_t cgroup_total = 0; \ |
346 | char disk_id[10]; \ | |
22084190 VG |
347 | \ |
348 | if (!cgroup_lock_live_group(cgroup)) \ | |
349 | return -ENODEV; \ | |
350 | \ | |
351 | blkcg = cgroup_to_blkio_cgroup(cgroup); \ | |
352 | rcu_read_lock(); \ | |
353 | hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {\ | |
303a3acb DS |
354 | if (blkg->dev) { \ |
355 | spin_lock_irq(&blkg->stats_lock); \ | |
356 | snprintf(disk_id, 10, "%u:%u", MAJOR(blkg->dev),\ | |
357 | MINOR(blkg->dev)); \ | |
358 | cgroup_total += get_stats(blkg, cb, getvar, \ | |
359 | disk_id); \ | |
360 | spin_unlock_irq(&blkg->stats_lock); \ | |
361 | } \ | |
22084190 | 362 | } \ |
303a3acb DS |
363 | if (show_total) \ |
364 | cb->fill(cb, "Total", cgroup_total); \ | |
22084190 VG |
365 | rcu_read_unlock(); \ |
366 | cgroup_unlock(); \ | |
367 | return 0; \ | |
368 | } | |
369 | ||
303a3acb DS |
370 | SHOW_FUNCTION_PER_GROUP(time, get_stat, get_time_stat, 0); |
371 | SHOW_FUNCTION_PER_GROUP(sectors, get_stat, get_sectors_stat, 0); | |
372 | SHOW_FUNCTION_PER_GROUP(io_service_bytes, get_typed_stat, | |
373 | get_io_service_bytes_stat, 1); | |
374 | SHOW_FUNCTION_PER_GROUP(io_serviced, get_typed_stat, get_io_serviced_stat, 1); | |
375 | SHOW_FUNCTION_PER_GROUP(io_service_time, get_typed_stat, | |
376 | get_io_service_time_stat, 1); | |
377 | SHOW_FUNCTION_PER_GROUP(io_wait_time, get_typed_stat, get_io_wait_time_stat, 1); | |
22084190 | 378 | #ifdef CONFIG_DEBUG_BLK_CGROUP |
303a3acb | 379 | SHOW_FUNCTION_PER_GROUP(dequeue, get_stat, get_dequeue_stat, 0); |
22084190 VG |
380 | #endif |
381 | #undef SHOW_FUNCTION_PER_GROUP | |
382 | ||
383 | #ifdef CONFIG_DEBUG_BLK_CGROUP | |
9195291e | 384 | void blkiocg_update_dequeue_stats(struct blkio_group *blkg, |
22084190 VG |
385 | unsigned long dequeue) |
386 | { | |
303a3acb | 387 | blkg->stats.dequeue += dequeue; |
22084190 | 388 | } |
9195291e | 389 | EXPORT_SYMBOL_GPL(blkiocg_update_dequeue_stats); |
22084190 VG |
390 | #endif |
391 | ||
31e4c28d VG |
392 | struct cftype blkio_files[] = { |
393 | { | |
394 | .name = "weight", | |
395 | .read_u64 = blkiocg_weight_read, | |
396 | .write_u64 = blkiocg_weight_write, | |
397 | }, | |
22084190 VG |
398 | { |
399 | .name = "time", | |
303a3acb DS |
400 | .read_map = blkiocg_time_read, |
401 | .write_u64 = blkiocg_reset_write, | |
22084190 VG |
402 | }, |
403 | { | |
404 | .name = "sectors", | |
303a3acb DS |
405 | .read_map = blkiocg_sectors_read, |
406 | .write_u64 = blkiocg_reset_write, | |
407 | }, | |
408 | { | |
409 | .name = "io_service_bytes", | |
410 | .read_map = blkiocg_io_service_bytes_read, | |
411 | .write_u64 = blkiocg_reset_write, | |
412 | }, | |
413 | { | |
414 | .name = "io_serviced", | |
415 | .read_map = blkiocg_io_serviced_read, | |
416 | .write_u64 = blkiocg_reset_write, | |
417 | }, | |
418 | { | |
419 | .name = "io_service_time", | |
420 | .read_map = blkiocg_io_service_time_read, | |
421 | .write_u64 = blkiocg_reset_write, | |
422 | }, | |
423 | { | |
424 | .name = "io_wait_time", | |
425 | .read_map = blkiocg_io_wait_time_read, | |
426 | .write_u64 = blkiocg_reset_write, | |
22084190 VG |
427 | }, |
428 | #ifdef CONFIG_DEBUG_BLK_CGROUP | |
429 | { | |
430 | .name = "dequeue", | |
303a3acb | 431 | .read_map = blkiocg_dequeue_read, |
22084190 VG |
432 | }, |
433 | #endif | |
31e4c28d VG |
434 | }; |
435 | ||
436 | static int blkiocg_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup) | |
437 | { | |
438 | return cgroup_add_files(cgroup, subsys, blkio_files, | |
439 | ARRAY_SIZE(blkio_files)); | |
440 | } | |
441 | ||
442 | static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup) | |
443 | { | |
444 | struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup); | |
b1c35769 VG |
445 | unsigned long flags; |
446 | struct blkio_group *blkg; | |
447 | void *key; | |
3e252066 | 448 | struct blkio_policy_type *blkiop; |
b1c35769 VG |
449 | |
450 | rcu_read_lock(); | |
451 | remove_entry: | |
452 | spin_lock_irqsave(&blkcg->lock, flags); | |
453 | ||
454 | if (hlist_empty(&blkcg->blkg_list)) { | |
455 | spin_unlock_irqrestore(&blkcg->lock, flags); | |
456 | goto done; | |
457 | } | |
458 | ||
459 | blkg = hlist_entry(blkcg->blkg_list.first, struct blkio_group, | |
460 | blkcg_node); | |
461 | key = rcu_dereference(blkg->key); | |
462 | __blkiocg_del_blkio_group(blkg); | |
31e4c28d | 463 | |
b1c35769 VG |
464 | spin_unlock_irqrestore(&blkcg->lock, flags); |
465 | ||
466 | /* | |
467 | * This blkio_group is being unlinked as associated cgroup is going | |
468 | * away. Let all the IO controlling policies know about this event. | |
469 | * | |
470 | * Currently this is static call to one io controlling policy. Once | |
471 | * we have more policies in place, we need some dynamic registration | |
472 | * of callback function. | |
473 | */ | |
3e252066 VG |
474 | spin_lock(&blkio_list_lock); |
475 | list_for_each_entry(blkiop, &blkio_list, list) | |
476 | blkiop->ops.blkio_unlink_group_fn(key, blkg); | |
477 | spin_unlock(&blkio_list_lock); | |
b1c35769 VG |
478 | goto remove_entry; |
479 | done: | |
31e4c28d | 480 | free_css_id(&blkio_subsys, &blkcg->css); |
b1c35769 | 481 | rcu_read_unlock(); |
67523c48 BB |
482 | if (blkcg != &blkio_root_cgroup) |
483 | kfree(blkcg); | |
31e4c28d VG |
484 | } |
485 | ||
486 | static struct cgroup_subsys_state * | |
487 | blkiocg_create(struct cgroup_subsys *subsys, struct cgroup *cgroup) | |
488 | { | |
489 | struct blkio_cgroup *blkcg, *parent_blkcg; | |
490 | ||
491 | if (!cgroup->parent) { | |
492 | blkcg = &blkio_root_cgroup; | |
493 | goto done; | |
494 | } | |
495 | ||
496 | /* Currently we do not support hierarchy deeper than two level (0,1) */ | |
497 | parent_blkcg = cgroup_to_blkio_cgroup(cgroup->parent); | |
498 | if (css_depth(&parent_blkcg->css) > 0) | |
499 | return ERR_PTR(-EINVAL); | |
500 | ||
501 | blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL); | |
502 | if (!blkcg) | |
503 | return ERR_PTR(-ENOMEM); | |
504 | ||
505 | blkcg->weight = BLKIO_WEIGHT_DEFAULT; | |
506 | done: | |
507 | spin_lock_init(&blkcg->lock); | |
508 | INIT_HLIST_HEAD(&blkcg->blkg_list); | |
509 | ||
510 | return &blkcg->css; | |
511 | } | |
512 | ||
513 | /* | |
514 | * We cannot support shared io contexts, as we have no mean to support | |
515 | * two tasks with the same ioc in two different groups without major rework | |
516 | * of the main cic data structures. For now we allow a task to change | |
517 | * its cgroup only if it's the only owner of its ioc. | |
518 | */ | |
519 | static int blkiocg_can_attach(struct cgroup_subsys *subsys, | |
520 | struct cgroup *cgroup, struct task_struct *tsk, | |
521 | bool threadgroup) | |
522 | { | |
523 | struct io_context *ioc; | |
524 | int ret = 0; | |
525 | ||
526 | /* task_lock() is needed to avoid races with exit_io_context() */ | |
527 | task_lock(tsk); | |
528 | ioc = tsk->io_context; | |
529 | if (ioc && atomic_read(&ioc->nr_tasks) > 1) | |
530 | ret = -EINVAL; | |
531 | task_unlock(tsk); | |
532 | ||
533 | return ret; | |
534 | } | |
535 | ||
536 | static void blkiocg_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup, | |
537 | struct cgroup *prev, struct task_struct *tsk, | |
538 | bool threadgroup) | |
539 | { | |
540 | struct io_context *ioc; | |
541 | ||
542 | task_lock(tsk); | |
543 | ioc = tsk->io_context; | |
544 | if (ioc) | |
545 | ioc->cgroup_changed = 1; | |
546 | task_unlock(tsk); | |
547 | } | |
548 | ||
3e252066 VG |
549 | void blkio_policy_register(struct blkio_policy_type *blkiop) |
550 | { | |
551 | spin_lock(&blkio_list_lock); | |
552 | list_add_tail(&blkiop->list, &blkio_list); | |
553 | spin_unlock(&blkio_list_lock); | |
554 | } | |
555 | EXPORT_SYMBOL_GPL(blkio_policy_register); | |
556 | ||
557 | void blkio_policy_unregister(struct blkio_policy_type *blkiop) | |
558 | { | |
559 | spin_lock(&blkio_list_lock); | |
560 | list_del_init(&blkiop->list); | |
561 | spin_unlock(&blkio_list_lock); | |
562 | } | |
563 | EXPORT_SYMBOL_GPL(blkio_policy_unregister); | |
67523c48 BB |
564 | |
565 | static int __init init_cgroup_blkio(void) | |
566 | { | |
567 | return cgroup_load_subsys(&blkio_subsys); | |
568 | } | |
569 | ||
570 | static void __exit exit_cgroup_blkio(void) | |
571 | { | |
572 | cgroup_unload_subsys(&blkio_subsys); | |
573 | } | |
574 | ||
575 | module_init(init_cgroup_blkio); | |
576 | module_exit(exit_cgroup_blkio); | |
577 | MODULE_LICENSE("GPL"); |