block/blk-cgroup.c

   1 /*
   2  * Common Block IO controller cgroup interface
   3  *
   4  * Based on ideas and code from CFQ, CFS and BFQ:
   5  * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
   6  *
   7  * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
   8  *                    Paolo Valente <paolo.valente@unimore.it>
   9  *
  10  * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
  11  *                    Nauman Rafique <nauman@google.com>
  12  */
  13 #include <linux/ioprio.h>
  14 #include <linux/seq_file.h>
  15 #include <linux/kdev_t.h>
  16 #include <linux/module.h>
  17 #include <linux/err.h>
  18 #include <linux/blkdev.h>
  19 #include <linux/slab.h>
  20 #include <linux/genhd.h>
  21 #include <linux/delay.h>
  22 #include "blk-cgroup.h"
  23 #include "blk.h"
  24
  25 #define MAX_KEY_LEN 100
  26
  27 static DEFINE_SPINLOCK(blkio_list_lock);
  28 static LIST_HEAD(blkio_list);
  29
  30 static DEFINE_MUTEX(all_q_mutex);
  31 static LIST_HEAD(all_q_list);
  32
  33 /* List of groups pending per cpu stats allocation */
  34 static DEFINE_SPINLOCK(alloc_list_lock);
  35 static LIST_HEAD(alloc_list);
  36
  37 static void blkio_stat_alloc_fn(struct work_struct *);
  38 static DECLARE_DELAYED_WORK(blkio_stat_alloc_work, blkio_stat_alloc_fn);
  39
  40 struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
  41 EXPORT_SYMBOL_GPL(blkio_root_cgroup);
  42
  43 static struct blkio_policy_type *blkio_policy[BLKIO_NR_POLICIES];
  44
  45 static struct cgroup_subsys_state *blkiocg_create(struct cgroup_subsys *,
  46                                                   struct cgroup *);
  47 static int blkiocg_can_attach(struct cgroup_subsys *, struct cgroup *,
  48                               struct cgroup_taskset *);
  49 static void blkiocg_attach(struct cgroup_subsys *, struct cgroup *,
  50                            struct cgroup_taskset *);
  51 static int blkiocg_pre_destroy(struct cgroup_subsys *, struct cgroup *);
  52 static void blkiocg_destroy(struct cgroup_subsys *, struct cgroup *);
  53 static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *);
  54
  55 /* for encoding cft->private value on file */
  56 #define BLKIOFILE_PRIVATE(x, val)       (((x) << 16) | (val))
  57 /* What policy owns the file, proportional or throttle */
  58 #define BLKIOFILE_POLICY(val)           (((val) >> 16) & 0xffff)
  59 #define BLKIOFILE_ATTR(val)             ((val) & 0xffff)
  60
  61 struct cgroup_subsys blkio_subsys = {
  62         .name = "blkio",
  63         .create = blkiocg_create,
  64         .can_attach = blkiocg_can_attach,
  65         .attach = blkiocg_attach,
  66         .pre_destroy = blkiocg_pre_destroy,
  67         .destroy = blkiocg_destroy,
  68         .populate = blkiocg_populate,
  69         .subsys_id = blkio_subsys_id,
  70         .module = THIS_MODULE,
  71 };
  72 EXPORT_SYMBOL_GPL(blkio_subsys);
  73
  74 struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
  75 {
  76         return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
  77                             struct blkio_cgroup, css);
  78 }
  79 EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup);
  80
  81 static struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk)
  82 {
  83         return container_of(task_subsys_state(tsk, blkio_subsys_id),
  84                             struct blkio_cgroup, css);
  85 }
  86
  87 struct blkio_cgroup *bio_blkio_cgroup(struct bio *bio)
  88 {
  89         if (bio && bio->bi_css)
  90                 return container_of(bio->bi_css, struct blkio_cgroup, css);
  91         return task_blkio_cgroup(current);
  92 }
  93 EXPORT_SYMBOL_GPL(bio_blkio_cgroup);
  94
  95 static inline void blkio_update_group_weight(struct blkio_group *blkg,
  96                                              int plid, unsigned int weight)
  97 {
  98         struct blkio_policy_type *blkiop;
  99
 100         list_for_each_entry(blkiop, &blkio_list, list) {
 101                 /* If this policy does not own the blkg, do not send updates */
 102                 if (blkiop->plid != plid)
 103                         continue;
 104                 if (blkiop->ops.blkio_update_group_weight_fn)
 105                         blkiop->ops.blkio_update_group_weight_fn(blkg->q,
 106                                                         blkg, weight);
 107         }
 108 }
 109
 110 static inline void blkio_update_group_bps(struct blkio_group *blkg, int plid,
 111                                           u64 bps, int fileid)
 112 {
 113         struct blkio_policy_type *blkiop;
 114
 115         list_for_each_entry(blkiop, &blkio_list, list) {
 116
 117                 /* If this policy does not own the blkg, do not send updates */
 118                 if (blkiop->plid != plid)
 119                         continue;
 120
 121                 if (fileid == BLKIO_THROTL_read_bps_device
 122                     && blkiop->ops.blkio_update_group_read_bps_fn)
 123                         blkiop->ops.blkio_update_group_read_bps_fn(blkg->q,
 124                                                                 blkg, bps);
 125
 126                 if (fileid == BLKIO_THROTL_write_bps_device
 127                     && blkiop->ops.blkio_update_group_write_bps_fn)
 128                         blkiop->ops.blkio_update_group_write_bps_fn(blkg->q,
 129                                                                 blkg, bps);
 130         }
 131 }
 132
 133 static inline void blkio_update_group_iops(struct blkio_group *blkg,
 134                                            int plid, unsigned int iops,
 135                                            int fileid)
 136 {
 137         struct blkio_policy_type *blkiop;
 138
 139         list_for_each_entry(blkiop, &blkio_list, list) {
 140
 141                 /* If this policy does not own the blkg, do not send updates */
 142                 if (blkiop->plid != plid)
 143                         continue;
 144
 145                 if (fileid == BLKIO_THROTL_read_iops_device
 146                     && blkiop->ops.blkio_update_group_read_iops_fn)
 147                         blkiop->ops.blkio_update_group_read_iops_fn(blkg->q,
 148                                                                 blkg, iops);
 149
 150                 if (fileid == BLKIO_THROTL_write_iops_device
 151                     && blkiop->ops.blkio_update_group_write_iops_fn)
 152                         blkiop->ops.blkio_update_group_write_iops_fn(blkg->q,
 153                                                                 blkg,iops);
 154         }
 155 }
 156
 157 /*
 158  * Add to the appropriate stat variable depending on the request type.
 159  * This should be called with the blkg->stats_lock held.
 160  */
 161 static void blkio_add_stat(uint64_t *stat, uint64_t add, bool direction,
 162                                 bool sync)
 163 {
 164         if (direction)
 165                 stat[BLKIO_STAT_WRITE] += add;
 166         else
 167                 stat[BLKIO_STAT_READ] += add;
 168         if (sync)
 169                 stat[BLKIO_STAT_SYNC] += add;
 170         else
 171                 stat[BLKIO_STAT_ASYNC] += add;
 172 }
 173
 174 /*
 175  * Decrements the appropriate stat variable if non-zero depending on the
 176  * request type. Panics on value being zero.
 177  * This should be called with the blkg->stats_lock held.
 178  */
 179 static void blkio_check_and_dec_stat(uint64_t *stat, bool direction, bool sync)
 180 {
 181         if (direction) {
 182                 BUG_ON(stat[BLKIO_STAT_WRITE] == 0);
 183                 stat[BLKIO_STAT_WRITE]--;
 184         } else {
 185                 BUG_ON(stat[BLKIO_STAT_READ] == 0);
 186                 stat[BLKIO_STAT_READ]--;
 187         }
 188         if (sync) {
 189                 BUG_ON(stat[BLKIO_STAT_SYNC] == 0);
 190                 stat[BLKIO_STAT_SYNC]--;
 191         } else {
 192                 BUG_ON(stat[BLKIO_STAT_ASYNC] == 0);
 193                 stat[BLKIO_STAT_ASYNC]--;
 194         }
 195 }
 196
 197 #ifdef CONFIG_DEBUG_BLK_CGROUP
 198 /* This should be called with the blkg->stats_lock held. */
 199 static void blkio_set_start_group_wait_time(struct blkio_group *blkg,
 200                                             struct blkio_policy_type *pol,
 201                                             struct blkio_group *curr_blkg)
 202 {
 203         struct blkg_policy_data *pd = blkg->pd[pol->plid];
 204
 205         if (blkio_blkg_waiting(&pd->stats))
 206                 return;
 207         if (blkg == curr_blkg)
 208                 return;
 209         pd->stats.start_group_wait_time = sched_clock();
 210         blkio_mark_blkg_waiting(&pd->stats);
 211 }
 212
 213 /* This should be called with the blkg->stats_lock held. */
 214 static void blkio_update_group_wait_time(struct blkio_group_stats *stats)
 215 {
 216         unsigned long long now;
 217
 218         if (!blkio_blkg_waiting(stats))
 219                 return;
 220
 221         now = sched_clock();
 222         if (time_after64(now, stats->start_group_wait_time))
 223                 stats->group_wait_time += now - stats->start_group_wait_time;
 224         blkio_clear_blkg_waiting(stats);
 225 }
 226
 227 /* This should be called with the blkg->stats_lock held. */
 228 static void blkio_end_empty_time(struct blkio_group_stats *stats)
 229 {
 230         unsigned long long now;
 231
 232         if (!blkio_blkg_empty(stats))
 233                 return;
 234
 235         now = sched_clock();
 236         if (time_after64(now, stats->start_empty_time))
 237                 stats->empty_time += now - stats->start_empty_time;
 238         blkio_clear_blkg_empty(stats);
 239 }
 240
 241 void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg,
 242                                         struct blkio_policy_type *pol)
 243 {
 244         struct blkg_policy_data *pd = blkg->pd[pol->plid];
 245         unsigned long flags;
 246
 247         spin_lock_irqsave(&blkg->stats_lock, flags);
 248         BUG_ON(blkio_blkg_idling(&pd->stats));
 249         pd->stats.start_idle_time = sched_clock();
 250         blkio_mark_blkg_idling(&pd->stats);
 251         spin_unlock_irqrestore(&blkg->stats_lock, flags);
 252 }
 253 EXPORT_SYMBOL_GPL(blkiocg_update_set_idle_time_stats);
 254
 255 void blkiocg_update_idle_time_stats(struct blkio_group *blkg,
 256                                     struct blkio_policy_type *pol)
 257 {
 258         struct blkg_policy_data *pd = blkg->pd[pol->plid];
 259         unsigned long flags;
 260         unsigned long long now;
 261         struct blkio_group_stats *stats;
 262
 263         spin_lock_irqsave(&blkg->stats_lock, flags);
 264         stats = &pd->stats;
 265         if (blkio_blkg_idling(stats)) {
 266                 now = sched_clock();
 267                 if (time_after64(now, stats->start_idle_time))
 268                         stats->idle_time += now - stats->start_idle_time;
 269                 blkio_clear_blkg_idling(stats);
 270         }
 271         spin_unlock_irqrestore(&blkg->stats_lock, flags);
 272 }
 273 EXPORT_SYMBOL_GPL(blkiocg_update_idle_time_stats);
 274
 275 void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg,
 276                                          struct blkio_policy_type *pol)
 277 {
 278         struct blkg_policy_data *pd = blkg->pd[pol->plid];
 279         unsigned long flags;
 280         struct blkio_group_stats *stats;
 281
 282         spin_lock_irqsave(&blkg->stats_lock, flags);
 283         stats = &pd->stats;
 284         stats->avg_queue_size_sum +=
 285                         stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] +
 286                         stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE];
 287         stats->avg_queue_size_samples++;
 288         blkio_update_group_wait_time(stats);
 289         spin_unlock_irqrestore(&blkg->stats_lock, flags);
 290 }
 291 EXPORT_SYMBOL_GPL(blkiocg_update_avg_queue_size_stats);
 292
 293 void blkiocg_set_start_empty_time(struct blkio_group *blkg,
 294                                   struct blkio_policy_type *pol)
 295 {
 296         struct blkg_policy_data *pd = blkg->pd[pol->plid];
 297         unsigned long flags;
 298         struct blkio_group_stats *stats;
 299
 300         spin_lock_irqsave(&blkg->stats_lock, flags);
 301         stats = &pd->stats;
 302
 303         if (stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] ||
 304                         stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE]) {
 305                 spin_unlock_irqrestore(&blkg->stats_lock, flags);
 306                 return;
 307         }
 308
 309         /*
 310          * group is already marked empty. This can happen if cfqq got new
 311          * request in parent group and moved to this group while being added
 312          * to service tree. Just ignore the event and move on.
 313          */
 314         if(blkio_blkg_empty(stats)) {
 315                 spin_unlock_irqrestore(&blkg->stats_lock, flags);
 316                 return;
 317         }
 318
 319         stats->start_empty_time = sched_clock();
 320         blkio_mark_blkg_empty(stats);
 321         spin_unlock_irqrestore(&blkg->stats_lock, flags);
 322 }
 323 EXPORT_SYMBOL_GPL(blkiocg_set_start_empty_time);
 324
 325 void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
 326                                   struct blkio_policy_type *pol,
 327                                   unsigned long dequeue)
 328 {
 329         struct blkg_policy_data *pd = blkg->pd[pol->plid];
 330
 331         pd->stats.dequeue += dequeue;
 332 }
 333 EXPORT_SYMBOL_GPL(blkiocg_update_dequeue_stats);
 334 #else
 335 static inline void blkio_set_start_group_wait_time(struct blkio_group *blkg,
 336                                         struct blkio_policy_type *pol,
 337                                         struct blkio_group *curr_blkg) { }
 338 static inline void blkio_end_empty_time(struct blkio_group_stats *stats) { }
 339 #endif
 340
 341 void blkiocg_update_io_add_stats(struct blkio_group *blkg,
 342                                  struct blkio_policy_type *pol,
 343                                  struct blkio_group *curr_blkg, bool direction,
 344                                  bool sync)
 345 {
 346         struct blkg_policy_data *pd = blkg->pd[pol->plid];
 347         unsigned long flags;
 348
 349         spin_lock_irqsave(&blkg->stats_lock, flags);
 350         blkio_add_stat(pd->stats.stat_arr[BLKIO_STAT_QUEUED], 1, direction,
 351                         sync);
 352         blkio_end_empty_time(&pd->stats);
 353         blkio_set_start_group_wait_time(blkg, pol, curr_blkg);
 354         spin_unlock_irqrestore(&blkg->stats_lock, flags);
 355 }
 356 EXPORT_SYMBOL_GPL(blkiocg_update_io_add_stats);
 357
 358 void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
 359                                     struct blkio_policy_type *pol,
 360                                     bool direction, bool sync)
 361 {
 362         struct blkg_policy_data *pd = blkg->pd[pol->plid];
 363         unsigned long flags;
 364
 365         spin_lock_irqsave(&blkg->stats_lock, flags);
 366         blkio_check_and_dec_stat(pd->stats.stat_arr[BLKIO_STAT_QUEUED],
 367                                         direction, sync);
 368         spin_unlock_irqrestore(&blkg->stats_lock, flags);
 369 }
 370 EXPORT_SYMBOL_GPL(blkiocg_update_io_remove_stats);
 371
 372 void blkiocg_update_timeslice_used(struct blkio_group *blkg,
 373                                    struct blkio_policy_type *pol,
 374                                    unsigned long time,
 375                                    unsigned long unaccounted_time)
 376 {
 377         struct blkg_policy_data *pd = blkg->pd[pol->plid];
 378         unsigned long flags;
 379
 380         spin_lock_irqsave(&blkg->stats_lock, flags);
 381         pd->stats.time += time;
 382 #ifdef CONFIG_DEBUG_BLK_CGROUP
 383         pd->stats.unaccounted_time += unaccounted_time;
 384 #endif
 385         spin_unlock_irqrestore(&blkg->stats_lock, flags);
 386 }
 387 EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used);
 388
 389 /*
 390  * should be called under rcu read lock or queue lock to make sure blkg pointer
 391  * is valid.
 392  */
 393 void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
 394                                    struct blkio_policy_type *pol,
 395                                    uint64_t bytes, bool direction, bool sync)
 396 {
 397         struct blkg_policy_data *pd = blkg->pd[pol->plid];
 398         struct blkio_group_stats_cpu *stats_cpu;
 399         unsigned long flags;
 400
 401         /* If per cpu stats are not allocated yet, don't do any accounting. */
 402         if (pd->stats_cpu == NULL)
 403                 return;
 404
 405         /*
 406          * Disabling interrupts to provide mutual exclusion between two
 407          * writes on same cpu. It probably is not needed for 64bit. Not
 408          * optimizing that case yet.
 409          */
 410         local_irq_save(flags);
 411
 412         stats_cpu = this_cpu_ptr(pd->stats_cpu);
 413
 414         u64_stats_update_begin(&stats_cpu->syncp);
 415         stats_cpu->sectors += bytes >> 9;
 416         blkio_add_stat(stats_cpu->stat_arr_cpu[BLKIO_STAT_CPU_SERVICED],
 417                         1, direction, sync);
 418         blkio_add_stat(stats_cpu->stat_arr_cpu[BLKIO_STAT_CPU_SERVICE_BYTES],
 419                         bytes, direction, sync);
 420         u64_stats_update_end(&stats_cpu->syncp);
 421         local_irq_restore(flags);
 422 }
 423 EXPORT_SYMBOL_GPL(blkiocg_update_dispatch_stats);
 424
 425 void blkiocg_update_completion_stats(struct blkio_group *blkg,
 426                                      struct blkio_policy_type *pol,
 427                                      uint64_t start_time,
 428                                      uint64_t io_start_time, bool direction,
 429                                      bool sync)
 430 {
 431         struct blkg_policy_data *pd = blkg->pd[pol->plid];
 432         struct blkio_group_stats *stats;
 433         unsigned long flags;
 434         unsigned long long now = sched_clock();
 435
 436         spin_lock_irqsave(&blkg->stats_lock, flags);
 437         stats = &pd->stats;
 438         if (time_after64(now, io_start_time))
 439                 blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICE_TIME],
 440                                 now - io_start_time, direction, sync);
 441         if (time_after64(io_start_time, start_time))
 442                 blkio_add_stat(stats->stat_arr[BLKIO_STAT_WAIT_TIME],
 443                                 io_start_time - start_time, direction, sync);
 444         spin_unlock_irqrestore(&blkg->stats_lock, flags);
 445 }
 446 EXPORT_SYMBOL_GPL(blkiocg_update_completion_stats);
 447
 448 /*  Merged stats are per cpu.  */
 449 void blkiocg_update_io_merged_stats(struct blkio_group *blkg,
 450                                     struct blkio_policy_type *pol,
 451                                     bool direction, bool sync)
 452 {
 453         struct blkg_policy_data *pd = blkg->pd[pol->plid];
 454         struct blkio_group_stats *stats;
 455         unsigned long flags;
 456
 457         spin_lock_irqsave(&blkg->stats_lock, flags);
 458         stats = &pd->stats;
 459         blkio_add_stat(stats->stat_arr[BLKIO_STAT_MERGED], 1, direction, sync);
 460         spin_unlock_irqrestore(&blkg->stats_lock, flags);
 461 }
 462 EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats);
 463
 464 /*
 465  * Worker for allocating per cpu stat for blk groups. This is scheduled on
 466  * the system_nrt_wq once there are some groups on the alloc_list waiting
 467  * for allocation.
 468  */
 469 static void blkio_stat_alloc_fn(struct work_struct *work)
 470 {
 471         static void *pcpu_stats[BLKIO_NR_POLICIES];
 472         struct delayed_work *dwork = to_delayed_work(work);
 473         struct blkio_group *blkg;
 474         int i;
 475         bool empty = false;
 476
 477 alloc_stats:
 478         for (i = 0; i < BLKIO_NR_POLICIES; i++) {
 479                 if (pcpu_stats[i] != NULL)
 480                         continue;
 481
 482                 pcpu_stats[i] = alloc_percpu(struct blkio_group_stats_cpu);
 483
 484                 /* Allocation failed. Try again after some time. */
 485                 if (pcpu_stats[i] == NULL) {
 486                         queue_delayed_work(system_nrt_wq, dwork,
 487                                                 msecs_to_jiffies(10));
 488                         return;
 489                 }
 490         }
 491
 492         spin_lock_irq(&blkio_list_lock);
 493         spin_lock(&alloc_list_lock);
 494
 495         /* cgroup got deleted or queue exited. */
 496         if (!list_empty(&alloc_list)) {
 497                 blkg = list_first_entry(&alloc_list, struct blkio_group,
 498                                                 alloc_node);
 499                 for (i = 0; i < BLKIO_NR_POLICIES; i++) {
 500                         struct blkg_policy_data *pd = blkg->pd[i];
 501
 502                         if (blkio_policy[i] && pd && !pd->stats_cpu)
 503                                 swap(pd->stats_cpu, pcpu_stats[i]);
 504                 }
 505
 506                 list_del_init(&blkg->alloc_node);
 507         }
 508
 509         empty = list_empty(&alloc_list);
 510
 511         spin_unlock(&alloc_list_lock);
 512         spin_unlock_irq(&blkio_list_lock);
 513
 514         if (!empty)
 515                 goto alloc_stats;
 516 }
 517
 518 /**
 519  * blkg_free - free a blkg
 520  * @blkg: blkg to free
 521  *
 522  * Free @blkg which may be partially allocated.
 523  */
 524 static void blkg_free(struct blkio_group *blkg)
 525 {
 526         int i;
 527
 528         if (!blkg)
 529                 return;
 530
 531         for (i = 0; i < BLKIO_NR_POLICIES; i++) {
 532                 struct blkg_policy_data *pd = blkg->pd[i];
 533
 534                 if (pd) {
 535                         free_percpu(pd->stats_cpu);
 536                         kfree(pd);
 537                 }
 538         }
 539
 540         kfree(blkg);
 541 }
 542
 543 /**
 544  * blkg_alloc - allocate a blkg
 545  * @blkcg: block cgroup the new blkg is associated with
 546  * @q: request_queue the new blkg is associated with
 547  *
 548  * Allocate a new blkg assocating @blkcg and @q.
 549  */
 550 static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg,
 551                                       struct request_queue *q)
 552 {
 553         struct blkio_group *blkg;
 554         int i;
 555
 556         /* alloc and init base part */
 557         blkg = kzalloc_node(sizeof(*blkg), GFP_ATOMIC, q->node);
 558         if (!blkg)
 559                 return NULL;
 560
 561         spin_lock_init(&blkg->stats_lock);
 562         blkg->q = q;
 563         INIT_LIST_HEAD(&blkg->q_node);
 564         INIT_LIST_HEAD(&blkg->alloc_node);
 565         blkg->blkcg = blkcg;
 566         blkg->refcnt = 1;
 567         cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path));
 568
 569         for (i = 0; i < BLKIO_NR_POLICIES; i++) {
 570                 struct blkio_policy_type *pol = blkio_policy[i];
 571                 struct blkg_policy_data *pd;
 572
 573                 if (!pol)
 574                         continue;
 575
 576                 /* alloc per-policy data and attach it to blkg */
 577                 pd = kzalloc_node(sizeof(*pd) + pol->pdata_size, GFP_ATOMIC,
 578                                   q->node);
 579                 if (!pd) {
 580                         blkg_free(blkg);
 581                         return NULL;
 582                 }
 583
 584                 blkg->pd[i] = pd;
 585                 pd->blkg = blkg;
 586         }
 587
 588         /* invoke per-policy init */
 589         for (i = 0; i < BLKIO_NR_POLICIES; i++) {
 590                 struct blkio_policy_type *pol = blkio_policy[i];
 591
 592                 if (pol)
 593                         pol->ops.blkio_init_group_fn(blkg);
 594         }
 595
 596         return blkg;
 597 }
 598
 599 struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
 600                                        struct request_queue *q,
 601                                        enum blkio_policy_id plid,
 602                                        bool for_root)
 603         __releases(q->queue_lock) __acquires(q->queue_lock)
 604 {
 605         struct blkio_group *blkg;
 606
 607         WARN_ON_ONCE(!rcu_read_lock_held());
 608         lockdep_assert_held(q->queue_lock);
 609
 610         /*
 611          * This could be the first entry point of blkcg implementation and
 612          * we shouldn't allow anything to go through for a bypassing queue.
 613          * The following can be removed if blkg lookup is guaranteed to
 614          * fail on a bypassing queue.
 615          */
 616         if (unlikely(blk_queue_bypass(q)) && !for_root)
 617                 return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
 618
 619         blkg = blkg_lookup(blkcg, q);
 620         if (blkg)
 621                 return blkg;
 622
 623         /* blkg holds a reference to blkcg */
 624         if (!css_tryget(&blkcg->css))
 625                 return ERR_PTR(-EINVAL);
 626
 627         /*
 628          * Allocate and initialize.
 629          */
 630         blkg = blkg_alloc(blkcg, q);
 631
 632         /* did alloc fail? */
 633         if (unlikely(!blkg)) {
 634                 blkg = ERR_PTR(-ENOMEM);
 635                 goto out;
 636         }
 637
 638         /* insert */
 639         spin_lock(&blkcg->lock);
 640         hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
 641         list_add(&blkg->q_node, &q->blkg_list);
 642         spin_unlock(&blkcg->lock);
 643
 644         spin_lock(&alloc_list_lock);
 645         list_add(&blkg->alloc_node, &alloc_list);
 646         /* Queue per cpu stat allocation from worker thread. */
 647         queue_delayed_work(system_nrt_wq, &blkio_stat_alloc_work, 0);
 648         spin_unlock(&alloc_list_lock);
 649 out:
 650         return blkg;
 651 }
 652 EXPORT_SYMBOL_GPL(blkg_lookup_create);
 653
 654 /* called under rcu_read_lock(). */
 655 struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
 656                                 struct request_queue *q)
 657 {
 658         struct blkio_group *blkg;
 659         struct hlist_node *n;
 660
 661         hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node)
 662                 if (blkg->q == q)
 663                         return blkg;
 664         return NULL;
 665 }
 666 EXPORT_SYMBOL_GPL(blkg_lookup);
 667
 668 static void blkg_destroy(struct blkio_group *blkg)
 669 {
 670         struct request_queue *q = blkg->q;
 671         struct blkio_cgroup *blkcg = blkg->blkcg;
 672
 673         lockdep_assert_held(q->queue_lock);
 674         lockdep_assert_held(&blkcg->lock);
 675
 676         /* Something wrong if we are trying to remove same group twice */
 677         WARN_ON_ONCE(list_empty(&blkg->q_node));
 678         WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node));
 679         list_del_init(&blkg->q_node);
 680         hlist_del_init_rcu(&blkg->blkcg_node);
 681
 682         spin_lock(&alloc_list_lock);
 683         list_del_init(&blkg->alloc_node);
 684         spin_unlock(&alloc_list_lock);
 685
 686         /*
 687          * Put the reference taken at the time of creation so that when all
 688          * queues are gone, group can be destroyed.
 689          */
 690         blkg_put(blkg);
 691 }
 692
 693 /*
 694  * XXX: This updates blkg policy data in-place for root blkg, which is
 695  * necessary across elevator switch and policy registration as root blkgs
 696  * aren't shot down.  This broken and racy implementation is temporary.
 697  * Eventually, blkg shoot down will be replaced by proper in-place update.
 698  */
 699 void update_root_blkg_pd(struct request_queue *q, enum blkio_policy_id plid)
 700 {
 701         struct blkio_policy_type *pol = blkio_policy[plid];
 702         struct blkio_group *blkg = blkg_lookup(&blkio_root_cgroup, q);
 703         struct blkg_policy_data *pd;
 704
 705         if (!blkg)
 706                 return;
 707
 708         kfree(blkg->pd[plid]);
 709         blkg->pd[plid] = NULL;
 710
 711         if (!pol)
 712                 return;
 713
 714         pd = kzalloc(sizeof(*pd) + pol->pdata_size, GFP_KERNEL);
 715         WARN_ON_ONCE(!pd);
 716
 717         pd->stats_cpu = alloc_percpu(struct blkio_group_stats_cpu);
 718         WARN_ON_ONCE(!pd->stats_cpu);
 719
 720         blkg->pd[plid] = pd;
 721         pd->blkg = blkg;
 722         pol->ops.blkio_init_group_fn(blkg);
 723 }
 724 EXPORT_SYMBOL_GPL(update_root_blkg_pd);
 725
 726 /**
 727  * blkg_destroy_all - destroy all blkgs associated with a request_queue
 728  * @q: request_queue of interest
 729  * @destroy_root: whether to destroy root blkg or not
 730  *
 731  * Destroy blkgs associated with @q.  If @destroy_root is %true, all are
 732  * destroyed; otherwise, root blkg is left alone.
 733  */
 734 void blkg_destroy_all(struct request_queue *q, bool destroy_root)
 735 {
 736         struct blkio_group *blkg, *n;
 737
 738         spin_lock_irq(q->queue_lock);
 739
 740         list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
 741                 struct blkio_cgroup *blkcg = blkg->blkcg;
 742
 743                 /* skip root? */
 744                 if (!destroy_root && blkg->blkcg == &blkio_root_cgroup)
 745                         continue;
 746
 747                 spin_lock(&blkcg->lock);
 748                 blkg_destroy(blkg);
 749                 spin_unlock(&blkcg->lock);
 750         }
 751
 752         spin_unlock_irq(q->queue_lock);
 753 }
 754 EXPORT_SYMBOL_GPL(blkg_destroy_all);
 755
 756 static void blkg_rcu_free(struct rcu_head *rcu_head)
 757 {
 758         blkg_free(container_of(rcu_head, struct blkio_group, rcu_head));
 759 }
 760
 761 void __blkg_release(struct blkio_group *blkg)
 762 {
 763         /* release the extra blkcg reference this blkg has been holding */
 764         css_put(&blkg->blkcg->css);
 765
 766         /*
 767          * A group is freed in rcu manner. But having an rcu lock does not
 768          * mean that one can access all the fields of blkg and assume these
 769          * are valid. For example, don't try to follow throtl_data and
 770          * request queue links.
 771          *
 772          * Having a reference to blkg under an rcu allows acess to only
 773          * values local to groups like group stats and group rate limits
 774          */
 775         call_rcu(&blkg->rcu_head, blkg_rcu_free);
 776 }
 777 EXPORT_SYMBOL_GPL(__blkg_release);
 778
 779 static void blkio_reset_stats_cpu(struct blkio_group *blkg, int plid)
 780 {
 781         struct blkg_policy_data *pd = blkg->pd[plid];
 782         struct blkio_group_stats_cpu *stats_cpu;
 783         int i, j, k;
 784
 785         if (pd->stats_cpu == NULL)
 786                 return;
 787         /*
 788          * Note: On 64 bit arch this should not be an issue. This has the
 789          * possibility of returning some inconsistent value on 32bit arch
 790          * as 64bit update on 32bit is non atomic. Taking care of this
 791          * corner case makes code very complicated, like sending IPIs to
 792          * cpus, taking care of stats of offline cpus etc.
 793          *
 794          * reset stats is anyway more of a debug feature and this sounds a
 795          * corner case. So I am not complicating the code yet until and
 796          * unless this becomes a real issue.
 797          */
 798         for_each_possible_cpu(i) {
 799                 stats_cpu = per_cpu_ptr(pd->stats_cpu, i);
 800                 stats_cpu->sectors = 0;
 801                 for(j = 0; j < BLKIO_STAT_CPU_NR; j++)
 802                         for (k = 0; k < BLKIO_STAT_TOTAL; k++)
 803                                 stats_cpu->stat_arr_cpu[j][k] = 0;
 804         }
 805 }
 806
 807 static int
 808 blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
 809 {
 810         struct blkio_cgroup *blkcg;
 811         struct blkio_group *blkg;
 812         struct blkio_group_stats *stats;
 813         struct hlist_node *n;
 814         uint64_t queued[BLKIO_STAT_TOTAL];
 815         int i;
 816 #ifdef CONFIG_DEBUG_BLK_CGROUP
 817         bool idling, waiting, empty;
 818         unsigned long long now = sched_clock();
 819 #endif
 820
 821         blkcg = cgroup_to_blkio_cgroup(cgroup);
 822         spin_lock(&blkio_list_lock);
 823         spin_lock_irq(&blkcg->lock);
 824         hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
 825                 struct blkio_policy_type *pol;
 826
 827                 list_for_each_entry(pol, &blkio_list, list) {
 828                         struct blkg_policy_data *pd = blkg->pd[pol->plid];
 829
 830                         spin_lock(&blkg->stats_lock);
 831                         stats = &pd->stats;
 832 #ifdef CONFIG_DEBUG_BLK_CGROUP
 833                         idling = blkio_blkg_idling(stats);
 834                         waiting = blkio_blkg_waiting(stats);
 835                         empty = blkio_blkg_empty(stats);
 836 #endif
 837                         for (i = 0; i < BLKIO_STAT_TOTAL; i++)
 838                                 queued[i] = stats->stat_arr[BLKIO_STAT_QUEUED][i];
 839                         memset(stats, 0, sizeof(struct blkio_group_stats));
 840                         for (i = 0; i < BLKIO_STAT_TOTAL; i++)
 841                                 stats->stat_arr[BLKIO_STAT_QUEUED][i] = queued[i];
 842 #ifdef CONFIG_DEBUG_BLK_CGROUP
 843                         if (idling) {
 844                                 blkio_mark_blkg_idling(stats);
 845                                 stats->start_idle_time = now;
 846                         }
 847                         if (waiting) {
 848                                 blkio_mark_blkg_waiting(stats);
 849                                 stats->start_group_wait_time = now;
 850                         }
 851                         if (empty) {
 852                                 blkio_mark_blkg_empty(stats);
 853                                 stats->start_empty_time = now;
 854                         }
 855 #endif
 856                         spin_unlock(&blkg->stats_lock);
 857
 858                         /* Reset Per cpu stats which don't take blkg->stats_lock */
 859                         blkio_reset_stats_cpu(blkg, pol->plid);
 860                 }
 861         }
 862
 863         spin_unlock_irq(&blkcg->lock);
 864         spin_unlock(&blkio_list_lock);
 865         return 0;
 866 }
 867
 868 static void blkio_get_key_name(enum stat_sub_type type, const char *dname,
 869                                char *str, int chars_left, bool diskname_only)
 870 {
 871         snprintf(str, chars_left, "%s", dname);
 872         chars_left -= strlen(str);
 873         if (chars_left <= 0) {
 874                 printk(KERN_WARNING
 875                         "Possibly incorrect cgroup stat display format");
 876                 return;
 877         }
 878         if (diskname_only)
 879                 return;
 880         switch (type) {
 881         case BLKIO_STAT_READ:
 882                 strlcat(str, " Read", chars_left);
 883                 break;
 884         case BLKIO_STAT_WRITE:
 885                 strlcat(str, " Write", chars_left);
 886                 break;
 887         case BLKIO_STAT_SYNC:
 888                 strlcat(str, " Sync", chars_left);
 889                 break;
 890         case BLKIO_STAT_ASYNC:
 891                 strlcat(str, " Async", chars_left);
 892                 break;
 893         case BLKIO_STAT_TOTAL:
 894                 strlcat(str, " Total", chars_left);
 895                 break;
 896         default:
 897                 strlcat(str, " Invalid", chars_left);
 898         }
 899 }
 900
 901 static uint64_t blkio_fill_stat(char *str, int chars_left, uint64_t val,
 902                                 struct cgroup_map_cb *cb, const char *dname)
 903 {
 904         blkio_get_key_name(0, dname, str, chars_left, true);
 905         cb->fill(cb, str, val);
 906         return val;
 907 }
 908
 909
 910 static uint64_t blkio_read_stat_cpu(struct blkio_group *blkg, int plid,
 911                         enum stat_type_cpu type, enum stat_sub_type sub_type)
 912 {
 913         struct blkg_policy_data *pd = blkg->pd[plid];
 914         int cpu;
 915         struct blkio_group_stats_cpu *stats_cpu;
 916         u64 val = 0, tval;
 917
 918         if (pd->stats_cpu == NULL)
 919                 return val;
 920
 921         for_each_possible_cpu(cpu) {
 922                 unsigned int start;
 923                 stats_cpu = per_cpu_ptr(pd->stats_cpu, cpu);
 924
 925                 do {
 926                         start = u64_stats_fetch_begin(&stats_cpu->syncp);
 927                         if (type == BLKIO_STAT_CPU_SECTORS)
 928                                 tval = stats_cpu->sectors;
 929                         else
 930                                 tval = stats_cpu->stat_arr_cpu[type][sub_type];
 931                 } while(u64_stats_fetch_retry(&stats_cpu->syncp, start));
 932
 933                 val += tval;
 934         }
 935
 936         return val;
 937 }
 938
 939 static uint64_t blkio_get_stat_cpu(struct blkio_group *blkg, int plid,
 940                                    struct cgroup_map_cb *cb, const char *dname,
 941                                    enum stat_type_cpu type)
 942 {
 943         uint64_t disk_total, val;
 944         char key_str[MAX_KEY_LEN];
 945         enum stat_sub_type sub_type;
 946
 947         if (type == BLKIO_STAT_CPU_SECTORS) {
 948                 val = blkio_read_stat_cpu(blkg, plid, type, 0);
 949                 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, val, cb,
 950                                        dname);
 951         }
 952
 953         for (sub_type = BLKIO_STAT_READ; sub_type < BLKIO_STAT_TOTAL;
 954                         sub_type++) {
 955                 blkio_get_key_name(sub_type, dname, key_str, MAX_KEY_LEN,
 956                                    false);
 957                 val = blkio_read_stat_cpu(blkg, plid, type, sub_type);
 958                 cb->fill(cb, key_str, val);
 959         }
 960
 961         disk_total = blkio_read_stat_cpu(blkg, plid, type, BLKIO_STAT_READ) +
 962                 blkio_read_stat_cpu(blkg, plid, type, BLKIO_STAT_WRITE);
 963
 964         blkio_get_key_name(BLKIO_STAT_TOTAL, dname, key_str, MAX_KEY_LEN,
 965                            false);
 966         cb->fill(cb, key_str, disk_total);
 967         return disk_total;
 968 }
 969
 970 /* This should be called with blkg->stats_lock held */
 971 static uint64_t blkio_get_stat(struct blkio_group *blkg, int plid,
 972                                struct cgroup_map_cb *cb, const char *dname,
 973                                enum stat_type type)
 974 {
 975         struct blkg_policy_data *pd = blkg->pd[plid];
 976         uint64_t disk_total;
 977         char key_str[MAX_KEY_LEN];
 978         enum stat_sub_type sub_type;
 979
 980         if (type == BLKIO_STAT_TIME)
 981                 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
 982                                         pd->stats.time, cb, dname);
 983 #ifdef CONFIG_DEBUG_BLK_CGROUP
 984         if (type == BLKIO_STAT_UNACCOUNTED_TIME)
 985                 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
 986                                        pd->stats.unaccounted_time, cb, dname);
 987         if (type == BLKIO_STAT_AVG_QUEUE_SIZE) {
 988                 uint64_t sum = pd->stats.avg_queue_size_sum;
 989                 uint64_t samples = pd->stats.avg_queue_size_samples;
 990                 if (samples)
 991                         do_div(sum, samples);
 992                 else
 993                         sum = 0;
 994                 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
 995                                        sum, cb, dname);
 996         }
 997         if (type == BLKIO_STAT_GROUP_WAIT_TIME)
 998                 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
 999                                        pd->stats.group_wait_time, cb, dname);
1000         if (type == BLKIO_STAT_IDLE_TIME)
1001                 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
1002                                        pd->stats.idle_time, cb, dname);
1003         if (type == BLKIO_STAT_EMPTY_TIME)
1004                 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
1005                                        pd->stats.empty_time, cb, dname);
1006         if (type == BLKIO_STAT_DEQUEUE)
1007                 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
1008                                        pd->stats.dequeue, cb, dname);
1009 #endif
1010
1011         for (sub_type = BLKIO_STAT_READ; sub_type < BLKIO_STAT_TOTAL;
1012                         sub_type++) {
1013                 blkio_get_key_name(sub_type, dname, key_str, MAX_KEY_LEN,
1014                                    false);
1015                 cb->fill(cb, key_str, pd->stats.stat_arr[type][sub_type]);
1016         }
1017         disk_total = pd->stats.stat_arr[type][BLKIO_STAT_READ] +
1018                         pd->stats.stat_arr[type][BLKIO_STAT_WRITE];
1019         blkio_get_key_name(BLKIO_STAT_TOTAL, dname, key_str, MAX_KEY_LEN,
1020                            false);
1021         cb->fill(cb, key_str, disk_total);
1022         return disk_total;
1023 }
1024
1025 static int blkio_policy_parse_and_set(char *buf, enum blkio_policy_id plid,
1026                                       int fileid, struct blkio_cgroup *blkcg)
1027 {
1028         struct gendisk *disk = NULL;
1029         struct blkio_group *blkg = NULL;
1030         struct blkg_policy_data *pd;
1031         char *s[4], *p, *major_s = NULL, *minor_s = NULL;
1032         unsigned long major, minor;
1033         int i = 0, ret = -EINVAL;
1034         int part;
1035         dev_t dev;
1036         u64 temp;
1037
1038         memset(s, 0, sizeof(s));
1039
1040         while ((p = strsep(&buf, " ")) != NULL) {
1041                 if (!*p)
1042                         continue;
1043
1044                 s[i++] = p;
1045
1046                 /* Prevent from inputing too many things */
1047                 if (i == 3)
1048                         break;
1049         }
1050
1051         if (i != 2)
1052                 goto out;
1053
1054         p = strsep(&s[0], ":");
1055         if (p != NULL)
1056                 major_s = p;
1057         else
1058                 goto out;
1059
1060         minor_s = s[0];
1061         if (!minor_s)
1062                 goto out;
1063
1064         if (strict_strtoul(major_s, 10, &major))
1065                 goto out;
1066
1067         if (strict_strtoul(minor_s, 10, &minor))
1068                 goto out;
1069
1070         dev = MKDEV(major, minor);
1071
1072         if (strict_strtoull(s[1], 10, &temp))
1073                 goto out;
1074
1075         disk = get_gendisk(dev, &part);
1076         if (!disk || part)
1077                 goto out;
1078
1079         rcu_read_lock();
1080
1081         spin_lock_irq(disk->queue->queue_lock);
1082         blkg = blkg_lookup_create(blkcg, disk->queue, plid, false);
1083         spin_unlock_irq(disk->queue->queue_lock);
1084
1085         if (IS_ERR(blkg)) {
1086                 ret = PTR_ERR(blkg);
1087                 goto out_unlock;
1088         }
1089
1090         pd = blkg->pd[plid];
1091
1092         switch (plid) {
1093         case BLKIO_POLICY_PROP:
1094                 if ((temp < BLKIO_WEIGHT_MIN && temp > 0) ||
1095                      temp > BLKIO_WEIGHT_MAX)
1096                         goto out_unlock;
1097
1098                 pd->conf.weight = temp;
1099                 blkio_update_group_weight(blkg, plid, temp ?: blkcg->weight);
1100                 break;
1101         case BLKIO_POLICY_THROTL:
1102                 switch(fileid) {
1103                 case BLKIO_THROTL_read_bps_device:
1104                         pd->conf.bps[READ] = temp;
1105                         blkio_update_group_bps(blkg, plid, temp ?: -1, fileid);
1106                         break;
1107                 case BLKIO_THROTL_write_bps_device:
1108                         pd->conf.bps[WRITE] = temp;
1109                         blkio_update_group_bps(blkg, plid, temp ?: -1, fileid);
1110                         break;
1111                 case BLKIO_THROTL_read_iops_device:
1112                         if (temp > THROTL_IOPS_MAX)
1113                                 goto out_unlock;
1114                         pd->conf.iops[READ] = temp;
1115                         blkio_update_group_iops(blkg, plid, temp ?: -1, fileid);
1116                         break;
1117                 case BLKIO_THROTL_write_iops_device:
1118                         if (temp > THROTL_IOPS_MAX)
1119                                 goto out_unlock;
1120                         pd->conf.iops[WRITE] = temp;
1121                         blkio_update_group_iops(blkg, plid, temp ?: -1, fileid);
1122                         break;
1123                 }
1124                 break;
1125         default:
1126                 BUG();
1127         }
1128         ret = 0;
1129 out_unlock:
1130         rcu_read_unlock();
1131 out:
1132         put_disk(disk);
1133
1134         /*
1135          * If queue was bypassing, we should retry.  Do so after a short
1136          * msleep().  It isn't strictly necessary but queue can be
1137          * bypassing for some time and it's always nice to avoid busy
1138          * looping.
1139          */
1140         if (ret == -EBUSY) {
1141                 msleep(10);
1142                 return restart_syscall();
1143         }
1144         return ret;
1145 }
1146
1147 static int blkiocg_file_write(struct cgroup *cgrp, struct cftype *cft,
1148                                        const char *buffer)
1149 {
1150         int ret = 0;
1151         char *buf;
1152         struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
1153         enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
1154         int fileid = BLKIOFILE_ATTR(cft->private);
1155
1156         buf = kstrdup(buffer, GFP_KERNEL);
1157         if (!buf)
1158                 return -ENOMEM;
1159
1160         ret = blkio_policy_parse_and_set(buf, plid, fileid, blkcg);
1161         kfree(buf);
1162         return ret;
1163 }
1164
1165 static const char *blkg_dev_name(struct blkio_group *blkg)
1166 {
1167         /* some drivers (floppy) instantiate a queue w/o disk registered */
1168         if (blkg->q->backing_dev_info.dev)
1169                 return dev_name(blkg->q->backing_dev_info.dev);
1170         return NULL;
1171 }
1172
1173 static void blkio_print_group_conf(struct cftype *cft, struct blkio_group *blkg,
1174                                    struct seq_file *m)
1175 {
1176         int plid = BLKIOFILE_POLICY(cft->private);
1177         int fileid = BLKIOFILE_ATTR(cft->private);
1178         struct blkg_policy_data *pd = blkg->pd[plid];
1179         const char *dname = blkg_dev_name(blkg);
1180         int rw = WRITE;
1181
1182         if (!dname)
1183                 return;
1184
1185         switch (plid) {
1186                 case BLKIO_POLICY_PROP:
1187                         if (pd->conf.weight)
1188                                 seq_printf(m, "%s\t%u\n",
1189                                            dname, pd->conf.weight);
1190                         break;
1191                 case BLKIO_POLICY_THROTL:
1192                         switch (fileid) {
1193                         case BLKIO_THROTL_read_bps_device:
1194                                 rw = READ;
1195                         case BLKIO_THROTL_write_bps_device:
1196                                 if (pd->conf.bps[rw])
1197                                         seq_printf(m, "%s\t%llu\n",
1198                                                    dname, pd->conf.bps[rw]);
1199                                 break;
1200                         case BLKIO_THROTL_read_iops_device:
1201                                 rw = READ;
1202                         case BLKIO_THROTL_write_iops_device:
1203                                 if (pd->conf.iops[rw])
1204                                         seq_printf(m, "%s\t%u\n",
1205                                                    dname, pd->conf.iops[rw]);
1206                                 break;
1207                         }
1208                         break;
1209                 default:
1210                         BUG();
1211         }
1212 }
1213
1214 /* cgroup files which read their data from policy nodes end up here */
1215 static void blkio_read_conf(struct cftype *cft, struct blkio_cgroup *blkcg,
1216                             struct seq_file *m)
1217 {
1218         struct blkio_group *blkg;
1219         struct hlist_node *n;
1220
1221         spin_lock_irq(&blkcg->lock);
1222         hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node)
1223                 blkio_print_group_conf(cft, blkg, m);
1224         spin_unlock_irq(&blkcg->lock);
1225 }
1226
1227 static int blkiocg_file_read(struct cgroup *cgrp, struct cftype *cft,
1228                                 struct seq_file *m)
1229 {
1230         struct blkio_cgroup *blkcg;
1231         enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
1232         int name = BLKIOFILE_ATTR(cft->private);
1233
1234         blkcg = cgroup_to_blkio_cgroup(cgrp);
1235
1236         switch(plid) {
1237         case BLKIO_POLICY_PROP:
1238                 switch(name) {
1239                 case BLKIO_PROP_weight_device:
1240                         blkio_read_conf(cft, blkcg, m);
1241                         return 0;
1242                 default:
1243                         BUG();
1244                 }
1245                 break;
1246         case BLKIO_POLICY_THROTL:
1247                 switch(name){
1248                 case BLKIO_THROTL_read_bps_device:
1249                 case BLKIO_THROTL_write_bps_device:
1250                 case BLKIO_THROTL_read_iops_device:
1251                 case BLKIO_THROTL_write_iops_device:
1252                         blkio_read_conf(cft, blkcg, m);
1253                         return 0;
1254                 default:
1255                         BUG();
1256                 }
1257                 break;
1258         default:
1259                 BUG();
1260         }
1261
1262         return 0;
1263 }
1264
1265 static int blkio_read_blkg_stats(struct blkio_cgroup *blkcg,
1266                 struct cftype *cft, struct cgroup_map_cb *cb,
1267                 enum stat_type type, bool show_total, bool pcpu)
1268 {
1269         struct blkio_group *blkg;
1270         struct hlist_node *n;
1271         uint64_t cgroup_total = 0;
1272
1273         spin_lock_irq(&blkcg->lock);
1274
1275         hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
1276                 const char *dname = blkg_dev_name(blkg);
1277                 int plid = BLKIOFILE_POLICY(cft->private);
1278
1279                 if (!dname)
1280                         continue;
1281                 if (pcpu) {
1282                         cgroup_total += blkio_get_stat_cpu(blkg, plid,
1283                                                            cb, dname, type);
1284                 } else {
1285                         spin_lock(&blkg->stats_lock);
1286                         cgroup_total += blkio_get_stat(blkg, plid,
1287                                                        cb, dname, type);
1288                         spin_unlock(&blkg->stats_lock);
1289                 }
1290         }
1291         if (show_total)
1292                 cb->fill(cb, "Total", cgroup_total);
1293
1294         spin_unlock_irq(&blkcg->lock);
1295         return 0;
1296 }
1297
1298 /* All map kind of cgroup file get serviced by this function */
1299 static int blkiocg_file_read_map(struct cgroup *cgrp, struct cftype *cft,
1300                                 struct cgroup_map_cb *cb)
1301 {
1302         struct blkio_cgroup *blkcg;
1303         enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
1304         int name = BLKIOFILE_ATTR(cft->private);
1305
1306         blkcg = cgroup_to_blkio_cgroup(cgrp);
1307
1308         switch(plid) {
1309         case BLKIO_POLICY_PROP:
1310                 switch(name) {
1311                 case BLKIO_PROP_time:
1312                         return blkio_read_blkg_stats(blkcg, cft, cb,
1313                                                 BLKIO_STAT_TIME, 0, 0);
1314                 case BLKIO_PROP_sectors:
1315                         return blkio_read_blkg_stats(blkcg, cft, cb,
1316                                                 BLKIO_STAT_CPU_SECTORS, 0, 1);
1317                 case BLKIO_PROP_io_service_bytes:
1318                         return blkio_read_blkg_stats(blkcg, cft, cb,
1319                                         BLKIO_STAT_CPU_SERVICE_BYTES, 1, 1);
1320                 case BLKIO_PROP_io_serviced:
1321                         return blkio_read_blkg_stats(blkcg, cft, cb,
1322                                                 BLKIO_STAT_CPU_SERVICED, 1, 1);
1323                 case BLKIO_PROP_io_service_time:
1324                         return blkio_read_blkg_stats(blkcg, cft, cb,
1325                                                 BLKIO_STAT_SERVICE_TIME, 1, 0);
1326                 case BLKIO_PROP_io_wait_time:
1327                         return blkio_read_blkg_stats(blkcg, cft, cb,
1328                                                 BLKIO_STAT_WAIT_TIME, 1, 0);
1329                 case BLKIO_PROP_io_merged:
1330                         return blkio_read_blkg_stats(blkcg, cft, cb,
1331                                                 BLKIO_STAT_MERGED, 1, 0);
1332                 case BLKIO_PROP_io_queued:
1333                         return blkio_read_blkg_stats(blkcg, cft, cb,
1334                                                 BLKIO_STAT_QUEUED, 1, 0);
1335 #ifdef CONFIG_DEBUG_BLK_CGROUP
1336                 case BLKIO_PROP_unaccounted_time:
1337                         return blkio_read_blkg_stats(blkcg, cft, cb,
1338                                         BLKIO_STAT_UNACCOUNTED_TIME, 0, 0);
1339                 case BLKIO_PROP_dequeue:
1340                         return blkio_read_blkg_stats(blkcg, cft, cb,
1341                                                 BLKIO_STAT_DEQUEUE, 0, 0);
1342                 case BLKIO_PROP_avg_queue_size:
1343                         return blkio_read_blkg_stats(blkcg, cft, cb,
1344                                         BLKIO_STAT_AVG_QUEUE_SIZE, 0, 0);
1345                 case BLKIO_PROP_group_wait_time:
1346                         return blkio_read_blkg_stats(blkcg, cft, cb,
1347                                         BLKIO_STAT_GROUP_WAIT_TIME, 0, 0);
1348                 case BLKIO_PROP_idle_time:
1349                         return blkio_read_blkg_stats(blkcg, cft, cb,
1350                                                 BLKIO_STAT_IDLE_TIME, 0, 0);
1351                 case BLKIO_PROP_empty_time:
1352                         return blkio_read_blkg_stats(blkcg, cft, cb,
1353                                                 BLKIO_STAT_EMPTY_TIME, 0, 0);
1354 #endif
1355                 default:
1356                         BUG();
1357                 }
1358                 break;
1359         case BLKIO_POLICY_THROTL:
1360                 switch(name){
1361                 case BLKIO_THROTL_io_service_bytes:
1362                         return blkio_read_blkg_stats(blkcg, cft, cb,
1363                                                 BLKIO_STAT_CPU_SERVICE_BYTES, 1, 1);
1364                 case BLKIO_THROTL_io_serviced:
1365                         return blkio_read_blkg_stats(blkcg, cft, cb,
1366                                                 BLKIO_STAT_CPU_SERVICED, 1, 1);
1367                 default:
1368                         BUG();
1369                 }
1370                 break;
1371         default:
1372                 BUG();
1373         }
1374
1375         return 0;
1376 }
1377
1378 static int blkio_weight_write(struct blkio_cgroup *blkcg, int plid, u64 val)
1379 {
1380         struct blkio_group *blkg;
1381         struct hlist_node *n;
1382
1383         if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX)
1384                 return -EINVAL;
1385
1386         spin_lock(&blkio_list_lock);
1387         spin_lock_irq(&blkcg->lock);
1388         blkcg->weight = (unsigned int)val;
1389
1390         hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
1391                 struct blkg_policy_data *pd = blkg->pd[plid];
1392
1393                 if (!pd->conf.weight)
1394                         blkio_update_group_weight(blkg, plid, blkcg->weight);
1395         }
1396
1397         spin_unlock_irq(&blkcg->lock);
1398         spin_unlock(&blkio_list_lock);
1399         return 0;
1400 }
1401
1402 static u64 blkiocg_file_read_u64 (struct cgroup *cgrp, struct cftype *cft) {
1403         struct blkio_cgroup *blkcg;
1404         enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
1405         int name = BLKIOFILE_ATTR(cft->private);
1406
1407         blkcg = cgroup_to_blkio_cgroup(cgrp);
1408
1409         switch(plid) {
1410         case BLKIO_POLICY_PROP:
1411                 switch(name) {
1412                 case BLKIO_PROP_weight:
1413                         return (u64)blkcg->weight;
1414                 }
1415                 break;
1416         default:
1417                 BUG();
1418         }
1419         return 0;
1420 }
1421
1422 static int
1423 blkiocg_file_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val)
1424 {
1425         struct blkio_cgroup *blkcg;
1426         enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
1427         int name = BLKIOFILE_ATTR(cft->private);
1428
1429         blkcg = cgroup_to_blkio_cgroup(cgrp);
1430
1431         switch(plid) {
1432         case BLKIO_POLICY_PROP:
1433                 switch(name) {
1434                 case BLKIO_PROP_weight:
1435                         return blkio_weight_write(blkcg, plid, val);
1436                 }
1437                 break;
1438         default:
1439                 BUG();
1440         }
1441
1442         return 0;
1443 }
1444
1445 struct cftype blkio_files[] = {
1446         {
1447                 .name = "weight_device",
1448                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1449                                 BLKIO_PROP_weight_device),
1450                 .read_seq_string = blkiocg_file_read,
1451                 .write_string = blkiocg_file_write,
1452                 .max_write_len = 256,
1453         },
1454         {
1455                 .name = "weight",
1456                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1457                                 BLKIO_PROP_weight),
1458                 .read_u64 = blkiocg_file_read_u64,
1459                 .write_u64 = blkiocg_file_write_u64,
1460         },
1461         {
1462                 .name = "time",
1463                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1464                                 BLKIO_PROP_time),
1465                 .read_map = blkiocg_file_read_map,
1466         },
1467         {
1468                 .name = "sectors",
1469                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1470                                 BLKIO_PROP_sectors),
1471                 .read_map = blkiocg_file_read_map,
1472         },
1473         {
1474                 .name = "io_service_bytes",
1475                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1476                                 BLKIO_PROP_io_service_bytes),
1477                 .read_map = blkiocg_file_read_map,
1478         },
1479         {
1480                 .name = "io_serviced",
1481                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1482                                 BLKIO_PROP_io_serviced),
1483                 .read_map = blkiocg_file_read_map,
1484         },
1485         {
1486                 .name = "io_service_time",
1487                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1488                                 BLKIO_PROP_io_service_time),
1489                 .read_map = blkiocg_file_read_map,
1490         },
1491         {
1492                 .name = "io_wait_time",
1493                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1494                                 BLKIO_PROP_io_wait_time),
1495                 .read_map = blkiocg_file_read_map,
1496         },
1497         {
1498                 .name = "io_merged",
1499                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1500                                 BLKIO_PROP_io_merged),
1501                 .read_map = blkiocg_file_read_map,
1502         },
1503         {
1504                 .name = "io_queued",
1505                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1506                                 BLKIO_PROP_io_queued),
1507                 .read_map = blkiocg_file_read_map,
1508         },
1509         {
1510                 .name = "reset_stats",
1511                 .write_u64 = blkiocg_reset_stats,
1512         },
1513 #ifdef CONFIG_BLK_DEV_THROTTLING
1514         {
1515                 .name = "throttle.read_bps_device",
1516                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1517                                 BLKIO_THROTL_read_bps_device),
1518                 .read_seq_string = blkiocg_file_read,
1519                 .write_string = blkiocg_file_write,
1520                 .max_write_len = 256,
1521         },
1522
1523         {
1524                 .name = "throttle.write_bps_device",
1525                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1526                                 BLKIO_THROTL_write_bps_device),
1527                 .read_seq_string = blkiocg_file_read,
1528                 .write_string = blkiocg_file_write,
1529                 .max_write_len = 256,
1530         },
1531
1532         {
1533                 .name = "throttle.read_iops_device",
1534                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1535                                 BLKIO_THROTL_read_iops_device),
1536                 .read_seq_string = blkiocg_file_read,
1537                 .write_string = blkiocg_file_write,
1538                 .max_write_len = 256,
1539         },
1540
1541         {
1542                 .name = "throttle.write_iops_device",
1543                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1544                                 BLKIO_THROTL_write_iops_device),
1545                 .read_seq_string = blkiocg_file_read,
1546                 .write_string = blkiocg_file_write,
1547                 .max_write_len = 256,
1548         },
1549         {
1550                 .name = "throttle.io_service_bytes",
1551                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1552                                 BLKIO_THROTL_io_service_bytes),
1553                 .read_map = blkiocg_file_read_map,
1554         },
1555         {
1556                 .name = "throttle.io_serviced",
1557                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1558                                 BLKIO_THROTL_io_serviced),
1559                 .read_map = blkiocg_file_read_map,
1560         },
1561 #endif /* CONFIG_BLK_DEV_THROTTLING */
1562
1563 #ifdef CONFIG_DEBUG_BLK_CGROUP
1564         {
1565                 .name = "avg_queue_size",
1566                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1567                                 BLKIO_PROP_avg_queue_size),
1568                 .read_map = blkiocg_file_read_map,
1569         },
1570         {
1571                 .name = "group_wait_time",
1572                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1573                                 BLKIO_PROP_group_wait_time),
1574                 .read_map = blkiocg_file_read_map,
1575         },
1576         {
1577                 .name = "idle_time",
1578                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1579                                 BLKIO_PROP_idle_time),
1580                 .read_map = blkiocg_file_read_map,
1581         },
1582         {
1583                 .name = "empty_time",
1584                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1585                                 BLKIO_PROP_empty_time),
1586                 .read_map = blkiocg_file_read_map,
1587         },
1588         {
1589                 .name = "dequeue",
1590                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1591                                 BLKIO_PROP_dequeue),
1592                 .read_map = blkiocg_file_read_map,
1593         },
1594         {
1595                 .name = "unaccounted_time",
1596                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1597                                 BLKIO_PROP_unaccounted_time),
1598                 .read_map = blkiocg_file_read_map,
1599         },
1600 #endif
1601 };
1602
1603 static int blkiocg_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup)
1604 {
1605         return cgroup_add_files(cgroup, subsys, blkio_files,
1606                                 ARRAY_SIZE(blkio_files));
1607 }
1608
1609 /**
1610  * blkiocg_pre_destroy - cgroup pre_destroy callback
1611  * @subsys: cgroup subsys
1612  * @cgroup: cgroup of interest
1613  *
1614  * This function is called when @cgroup is about to go away and responsible
1615  * for shooting down all blkgs associated with @cgroup.  blkgs should be
1616  * removed while holding both q and blkcg locks.  As blkcg lock is nested
1617  * inside q lock, this function performs reverse double lock dancing.
1618  *
1619  * This is the blkcg counterpart of ioc_release_fn().
1620  */
1621 static int blkiocg_pre_destroy(struct cgroup_subsys *subsys,
1622                                struct cgroup *cgroup)
1623 {
1624         struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
1625
1626         spin_lock_irq(&blkcg->lock);
1627
1628         while (!hlist_empty(&blkcg->blkg_list)) {
1629                 struct blkio_group *blkg = hlist_entry(blkcg->blkg_list.first,
1630                                                 struct blkio_group, blkcg_node);
1631                 struct request_queue *q = blkg->q;
1632
1633                 if (spin_trylock(q->queue_lock)) {
1634                         blkg_destroy(blkg);
1635                         spin_unlock(q->queue_lock);
1636                 } else {
1637                         spin_unlock_irq(&blkcg->lock);
1638                         cpu_relax();
1639                         spin_lock(&blkcg->lock);
1640                 }
1641         }
1642
1643         spin_unlock_irq(&blkcg->lock);
1644         return 0;
1645 }
1646
1647 static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
1648 {
1649         struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
1650
1651         if (blkcg != &blkio_root_cgroup)
1652                 kfree(blkcg);
1653 }
1654
1655 static struct cgroup_subsys_state *
1656 blkiocg_create(struct cgroup_subsys *subsys, struct cgroup *cgroup)
1657 {
1658         struct blkio_cgroup *blkcg;
1659         struct cgroup *parent = cgroup->parent;
1660
1661         if (!parent) {
1662                 blkcg = &blkio_root_cgroup;
1663                 goto done;
1664         }
1665
1666         blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
1667         if (!blkcg)
1668                 return ERR_PTR(-ENOMEM);
1669
1670         blkcg->weight = BLKIO_WEIGHT_DEFAULT;
1671 done:
1672         spin_lock_init(&blkcg->lock);
1673         INIT_HLIST_HEAD(&blkcg->blkg_list);
1674
1675         return &blkcg->css;
1676 }
1677
1678 /**
1679  * blkcg_init_queue - initialize blkcg part of request queue
1680  * @q: request_queue to initialize
1681  *
1682  * Called from blk_alloc_queue_node(). Responsible for initializing blkcg
1683  * part of new request_queue @q.
1684  *
1685  * RETURNS:
1686  * 0 on success, -errno on failure.
1687  */
1688 int blkcg_init_queue(struct request_queue *q)
1689 {
1690         int ret;
1691
1692         might_sleep();
1693
1694         ret = blk_throtl_init(q);
1695         if (ret)
1696                 return ret;
1697
1698         mutex_lock(&all_q_mutex);
1699         INIT_LIST_HEAD(&q->all_q_node);
1700         list_add_tail(&q->all_q_node, &all_q_list);
1701         mutex_unlock(&all_q_mutex);
1702
1703         return 0;
1704 }
1705
1706 /**
1707  * blkcg_drain_queue - drain blkcg part of request_queue
1708  * @q: request_queue to drain
1709  *
1710  * Called from blk_drain_queue().  Responsible for draining blkcg part.
1711  */
1712 void blkcg_drain_queue(struct request_queue *q)
1713 {
1714         lockdep_assert_held(q->queue_lock);
1715
1716         blk_throtl_drain(q);
1717 }
1718
1719 /**
1720  * blkcg_exit_queue - exit and release blkcg part of request_queue
1721  * @q: request_queue being released
1722  *
1723  * Called from blk_release_queue().  Responsible for exiting blkcg part.
1724  */
1725 void blkcg_exit_queue(struct request_queue *q)
1726 {
1727         mutex_lock(&all_q_mutex);
1728         list_del_init(&q->all_q_node);
1729         mutex_unlock(&all_q_mutex);
1730
1731         blkg_destroy_all(q, true);
1732
1733         blk_throtl_exit(q);
1734 }
1735
1736 /*
1737  * We cannot support shared io contexts, as we have no mean to support
1738  * two tasks with the same ioc in two different groups without major rework
1739  * of the main cic data structures.  For now we allow a task to change
1740  * its cgroup only if it's the only owner of its ioc.
1741  */
1742 static int blkiocg_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
1743                               struct cgroup_taskset *tset)
1744 {
1745         struct task_struct *task;
1746         struct io_context *ioc;
1747         int ret = 0;
1748
1749         /* task_lock() is needed to avoid races with exit_io_context() */
1750         cgroup_taskset_for_each(task, cgrp, tset) {
1751                 task_lock(task);
1752                 ioc = task->io_context;
1753                 if (ioc && atomic_read(&ioc->nr_tasks) > 1)
1754                         ret = -EINVAL;
1755                 task_unlock(task);
1756                 if (ret)
1757                         break;
1758         }
1759         return ret;
1760 }
1761
1762 static void blkiocg_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
1763                            struct cgroup_taskset *tset)
1764 {
1765         struct task_struct *task;
1766         struct io_context *ioc;
1767
1768         cgroup_taskset_for_each(task, cgrp, tset) {
1769                 /* we don't lose anything even if ioc allocation fails */
1770                 ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE);
1771                 if (ioc) {
1772                         ioc_cgroup_changed(ioc);
1773                         put_io_context(ioc);
1774                 }
1775         }
1776 }
1777
1778 static void blkcg_bypass_start(void)
1779         __acquires(&all_q_mutex)
1780 {
1781         struct request_queue *q;
1782
1783         mutex_lock(&all_q_mutex);
1784
1785         list_for_each_entry(q, &all_q_list, all_q_node) {
1786                 blk_queue_bypass_start(q);
1787                 blkg_destroy_all(q, false);
1788         }
1789 }
1790
1791 static void blkcg_bypass_end(void)
1792         __releases(&all_q_mutex)
1793 {
1794         struct request_queue *q;
1795
1796         list_for_each_entry(q, &all_q_list, all_q_node)
1797                 blk_queue_bypass_end(q);
1798
1799         mutex_unlock(&all_q_mutex);
1800 }
1801
1802 void blkio_policy_register(struct blkio_policy_type *blkiop)
1803 {
1804         struct request_queue *q;
1805
1806         blkcg_bypass_start();
1807         spin_lock(&blkio_list_lock);
1808
1809         BUG_ON(blkio_policy[blkiop->plid]);
1810         blkio_policy[blkiop->plid] = blkiop;
1811         list_add_tail(&blkiop->list, &blkio_list);
1812
1813         spin_unlock(&blkio_list_lock);
1814         list_for_each_entry(q, &all_q_list, all_q_node)
1815                 update_root_blkg_pd(q, blkiop->plid);
1816         blkcg_bypass_end();
1817 }
1818 EXPORT_SYMBOL_GPL(blkio_policy_register);
1819
1820 void blkio_policy_unregister(struct blkio_policy_type *blkiop)
1821 {
1822         struct request_queue *q;
1823
1824         blkcg_bypass_start();
1825         spin_lock(&blkio_list_lock);
1826
1827         BUG_ON(blkio_policy[blkiop->plid] != blkiop);
1828         blkio_policy[blkiop->plid] = NULL;
1829         list_del_init(&blkiop->list);
1830
1831         spin_unlock(&blkio_list_lock);
1832         list_for_each_entry(q, &all_q_list, all_q_node)
1833                 update_root_blkg_pd(q, blkiop->plid);
1834         blkcg_bypass_end();
1835 }
1836 EXPORT_SYMBOL_GPL(blkio_policy_unregister);