include/linux/blk-cgroup.h

   1 /* SPDX-License-Identifier: GPL-2.0 */
   2 #ifndef _BLK_CGROUP_H
   3 #define _BLK_CGROUP_H
   4 /*
   5  * Common Block IO controller cgroup interface
   6  *
   7  * Based on ideas and code from CFQ, CFS and BFQ:
   8  * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
   9  *
  10  * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
  11  *                    Paolo Valente <paolo.valente@unimore.it>
  12  *
  13  * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
  14  *                    Nauman Rafique <nauman@google.com>
  15  */
  16
  17 #include <linux/cgroup.h>
  18 #include <linux/percpu_counter.h>
  19 #include <linux/seq_file.h>
  20 #include <linux/radix-tree.h>
  21 #include <linux/blkdev.h>
  22 #include <linux/atomic.h>
  23 #include <linux/kthread.h>
  24
  25 /* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */
  26 #define BLKG_STAT_CPU_BATCH     (INT_MAX / 2)
  27
  28 /* Max limits for throttle policy */
  29 #define THROTL_IOPS_MAX         UINT_MAX
  30
  31 #ifdef CONFIG_BLK_CGROUP
  32
  33 enum blkg_rwstat_type {
  34         BLKG_RWSTAT_READ,
  35         BLKG_RWSTAT_WRITE,
  36         BLKG_RWSTAT_SYNC,
  37         BLKG_RWSTAT_ASYNC,
  38         BLKG_RWSTAT_DISCARD,
  39
  40         BLKG_RWSTAT_NR,
  41         BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR,
  42 };
  43
  44 struct blkcg_gq;
  45
  46 struct blkcg {
  47         struct cgroup_subsys_state      css;
  48         spinlock_t                      lock;
  49
  50         struct radix_tree_root          blkg_tree;
  51         struct blkcg_gq __rcu           *blkg_hint;
  52         struct hlist_head               blkg_list;
  53
  54         struct blkcg_policy_data        *cpd[BLKCG_MAX_POLS];
  55
  56         struct list_head                all_blkcgs_node;
  57 #ifdef CONFIG_CGROUP_WRITEBACK
  58         struct list_head                cgwb_list;
  59         refcount_t                      cgwb_refcnt;
  60 #endif
  61 };
  62
  63 /*
  64  * blkg_[rw]stat->aux_cnt is excluded for local stats but included for
  65  * recursive.  Used to carry stats of dead children, and, for blkg_rwstat,
  66  * to carry result values from read and sum operations.
  67  */
  68 struct blkg_stat {
  69         struct percpu_counter           cpu_cnt;
  70         atomic64_t                      aux_cnt;
  71 };
  72
  73 struct blkg_rwstat {
  74         struct percpu_counter           cpu_cnt[BLKG_RWSTAT_NR];
  75         atomic64_t                      aux_cnt[BLKG_RWSTAT_NR];
  76 };
  77
  78 /*
  79  * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a
  80  * request_queue (q).  This is used by blkcg policies which need to track
  81  * information per blkcg - q pair.
  82  *
  83  * There can be multiple active blkcg policies and each blkg:policy pair is
  84  * represented by a blkg_policy_data which is allocated and freed by each
  85  * policy's pd_alloc/free_fn() methods.  A policy can allocate private data
  86  * area by allocating larger data structure which embeds blkg_policy_data
  87  * at the beginning.
  88  */
  89 struct blkg_policy_data {
  90         /* the blkg and policy id this per-policy data belongs to */
  91         struct blkcg_gq                 *blkg;
  92         int                             plid;
  93 };
  94
  95 /*
  96  * Policies that need to keep per-blkcg data which is independent from any
  97  * request_queue associated to it should implement cpd_alloc/free_fn()
  98  * methods.  A policy can allocate private data area by allocating larger
  99  * data structure which embeds blkcg_policy_data at the beginning.
 100  * cpd_init() is invoked to let each policy handle per-blkcg data.
 101  */
 102 struct blkcg_policy_data {
 103         /* the blkcg and policy id this per-policy data belongs to */
 104         struct blkcg                    *blkcg;
 105         int                             plid;
 106 };
 107
 108 /* association between a blk cgroup and a request queue */
 109 struct blkcg_gq {
 110         /* Pointer to the associated request_queue */
 111         struct request_queue            *q;
 112         struct list_head                q_node;
 113         struct hlist_node               blkcg_node;
 114         struct blkcg                    *blkcg;
 115
 116         /*
 117          * Each blkg gets congested separately and the congestion state is
 118          * propagated to the matching bdi_writeback_congested.
 119          */
 120         struct bdi_writeback_congested  *wb_congested;
 121
 122         /* all non-root blkcg_gq's are guaranteed to have access to parent */
 123         struct blkcg_gq                 *parent;
 124
 125         /* request allocation list for this blkcg-q pair */
 126         struct request_list             rl;
 127
 128         /* reference count */
 129         atomic_t                        refcnt;
 130
 131         /* is this blkg online? protected by both blkcg and q locks */
 132         bool                            online;
 133
 134         struct blkg_rwstat              stat_bytes;
 135         struct blkg_rwstat              stat_ios;
 136
 137         struct blkg_policy_data         *pd[BLKCG_MAX_POLS];
 138
 139         struct rcu_head                 rcu_head;
 140
 141         atomic_t                        use_delay;
 142         atomic64_t                      delay_nsec;
 143         atomic64_t                      delay_start;
 144         u64                             last_delay;
 145         int                             last_use;
 146 };
 147
 148 typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp);
 149 typedef void (blkcg_pol_init_cpd_fn)(struct blkcg_policy_data *cpd);
 150 typedef void (blkcg_pol_free_cpd_fn)(struct blkcg_policy_data *cpd);
 151 typedef void (blkcg_pol_bind_cpd_fn)(struct blkcg_policy_data *cpd);
 152 typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(gfp_t gfp, int node);
 153 typedef void (blkcg_pol_init_pd_fn)(struct blkg_policy_data *pd);
 154 typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd);
 155 typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd);
 156 typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd);
 157 typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd);
 158 typedef size_t (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd, char *buf,
 159                                       size_t size);
 160
 161 struct blkcg_policy {
 162         int                             plid;
 163         /* cgroup files for the policy */
 164         struct cftype                   *dfl_cftypes;
 165         struct cftype                   *legacy_cftypes;
 166
 167         /* operations */
 168         blkcg_pol_alloc_cpd_fn          *cpd_alloc_fn;
 169         blkcg_pol_init_cpd_fn           *cpd_init_fn;
 170         blkcg_pol_free_cpd_fn           *cpd_free_fn;
 171         blkcg_pol_bind_cpd_fn           *cpd_bind_fn;
 172
 173         blkcg_pol_alloc_pd_fn           *pd_alloc_fn;
 174         blkcg_pol_init_pd_fn            *pd_init_fn;
 175         blkcg_pol_online_pd_fn          *pd_online_fn;
 176         blkcg_pol_offline_pd_fn         *pd_offline_fn;
 177         blkcg_pol_free_pd_fn            *pd_free_fn;
 178         blkcg_pol_reset_pd_stats_fn     *pd_reset_stats_fn;
 179         blkcg_pol_stat_pd_fn            *pd_stat_fn;
 180 };
 181
 182 extern struct blkcg blkcg_root;
 183 extern struct cgroup_subsys_state * const blkcg_root_css;
 184
 185 struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
 186                                       struct request_queue *q, bool update_hint);
 187 struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
 188                                     struct request_queue *q);
 189 int blkcg_init_queue(struct request_queue *q);
 190 void blkcg_drain_queue(struct request_queue *q);
 191 void blkcg_exit_queue(struct request_queue *q);
 192
 193 /* Blkio controller policy registration */
 194 int blkcg_policy_register(struct blkcg_policy *pol);
 195 void blkcg_policy_unregister(struct blkcg_policy *pol);
 196 int blkcg_activate_policy(struct request_queue *q,
 197                           const struct blkcg_policy *pol);
 198 void blkcg_deactivate_policy(struct request_queue *q,
 199                              const struct blkcg_policy *pol);
 200
 201 const char *blkg_dev_name(struct blkcg_gq *blkg);
 202 void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
 203                        u64 (*prfill)(struct seq_file *,
 204                                      struct blkg_policy_data *, int),
 205                        const struct blkcg_policy *pol, int data,
 206                        bool show_total);
 207 u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v);
 208 u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
 209                          const struct blkg_rwstat *rwstat);
 210 u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off);
 211 u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
 212                        int off);
 213 int blkg_print_stat_bytes(struct seq_file *sf, void *v);
 214 int blkg_print_stat_ios(struct seq_file *sf, void *v);
 215 int blkg_print_stat_bytes_recursive(struct seq_file *sf, void *v);
 216 int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v);
 217
 218 u64 blkg_stat_recursive_sum(struct blkcg_gq *blkg,
 219                             struct blkcg_policy *pol, int off);
 220 struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkcg_gq *blkg,
 221                                              struct blkcg_policy *pol, int off);
 222
 223 struct blkg_conf_ctx {
 224         struct gendisk                  *disk;
 225         struct blkcg_gq                 *blkg;
 226         char                            *body;
 227 };
 228
 229 int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
 230                    char *input, struct blkg_conf_ctx *ctx);
 231 void blkg_conf_finish(struct blkg_conf_ctx *ctx);
 232
 233
 234 static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
 235 {
 236         return css ? container_of(css, struct blkcg, css) : NULL;
 237 }
 238
 239 static inline struct blkcg *bio_blkcg(struct bio *bio)
 240 {
 241         struct cgroup_subsys_state *css;
 242
 243         if (bio && bio->bi_css)
 244                 return css_to_blkcg(bio->bi_css);
 245         css = kthread_blkcg();
 246         if (css)
 247                 return css_to_blkcg(css);
 248         return css_to_blkcg(task_css(current, io_cgrp_id));
 249 }
 250
 251 static inline bool blk_cgroup_congested(void)
 252 {
 253         struct cgroup_subsys_state *css;
 254         bool ret = false;
 255
 256         rcu_read_lock();
 257         css = kthread_blkcg();
 258         if (!css)
 259                 css = task_css(current, io_cgrp_id);
 260         while (css) {
 261                 if (atomic_read(&css->cgroup->congestion_count)) {
 262                         ret = true;
 263                         break;
 264                 }
 265                 css = css->parent;
 266         }
 267         rcu_read_unlock();
 268         return ret;
 269 }
 270
 271 /**
 272  * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
 273  * @return: true if this bio needs to be submitted with the root blkg context.
 274  *
 275  * In order to avoid priority inversions we sometimes need to issue a bio as if
 276  * it were attached to the root blkg, and then backcharge to the actual owning
 277  * blkg.  The idea is we do bio_blkcg() to look up the actual context for the
 278  * bio and attach the appropriate blkg to the bio.  Then we call this helper and
 279  * if it is true run with the root blkg for that queue and then do any
 280  * backcharging to the originating cgroup once the io is complete.
 281  */
 282 static inline bool bio_issue_as_root_blkg(struct bio *bio)
 283 {
 284         return (bio->bi_opf & (REQ_META | REQ_SWAP)) != 0;
 285 }
 286
 287 /**
 288  * blkcg_parent - get the parent of a blkcg
 289  * @blkcg: blkcg of interest
 290  *
 291  * Return the parent blkcg of @blkcg.  Can be called anytime.
 292  */
 293 static inline struct blkcg *blkcg_parent(struct blkcg *blkcg)
 294 {
 295         return css_to_blkcg(blkcg->css.parent);
 296 }
 297
 298 /**
 299  * __blkg_lookup - internal version of blkg_lookup()
 300  * @blkcg: blkcg of interest
 301  * @q: request_queue of interest
 302  * @update_hint: whether to update lookup hint with the result or not
 303  *
 304  * This is internal version and shouldn't be used by policy
 305  * implementations.  Looks up blkgs for the @blkcg - @q pair regardless of
 306  * @q's bypass state.  If @update_hint is %true, the caller should be
 307  * holding @q->queue_lock and lookup hint is updated on success.
 308  */
 309 static inline struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
 310                                              struct request_queue *q,
 311                                              bool update_hint)
 312 {
 313         struct blkcg_gq *blkg;
 314
 315         if (blkcg == &blkcg_root)
 316                 return q->root_blkg;
 317
 318         blkg = rcu_dereference(blkcg->blkg_hint);
 319         if (blkg && blkg->q == q)
 320                 return blkg;
 321
 322         return blkg_lookup_slowpath(blkcg, q, update_hint);
 323 }
 324
 325 /**
 326  * blkg_lookup - lookup blkg for the specified blkcg - q pair
 327  * @blkcg: blkcg of interest
 328  * @q: request_queue of interest
 329  *
 330  * Lookup blkg for the @blkcg - @q pair.  This function should be called
 331  * under RCU read lock and is guaranteed to return %NULL if @q is bypassing
 332  * - see blk_queue_bypass_start() for details.
 333  */
 334 static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
 335                                            struct request_queue *q)
 336 {
 337         WARN_ON_ONCE(!rcu_read_lock_held());
 338
 339         if (unlikely(blk_queue_bypass(q)))
 340                 return NULL;
 341         return __blkg_lookup(blkcg, q, false);
 342 }
 343
 344 /**
 345  * blk_queue_root_blkg - return blkg for the (blkcg_root, @q) pair
 346  * @q: request_queue of interest
 347  *
 348  * Lookup blkg for @q at the root level. See also blkg_lookup().
 349  */
 350 static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
 351 {
 352         return q->root_blkg;
 353 }
 354
 355 /**
 356  * blkg_to_pdata - get policy private data
 357  * @blkg: blkg of interest
 358  * @pol: policy of interest
 359  *
 360  * Return pointer to private data associated with the @blkg-@pol pair.
 361  */
 362 static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
 363                                                   struct blkcg_policy *pol)
 364 {
 365         return blkg ? blkg->pd[pol->plid] : NULL;
 366 }
 367
 368 static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg,
 369                                                      struct blkcg_policy *pol)
 370 {
 371         return blkcg ? blkcg->cpd[pol->plid] : NULL;
 372 }
 373
 374 /**
 375  * pdata_to_blkg - get blkg associated with policy private data
 376  * @pd: policy private data of interest
 377  *
 378  * @pd is policy private data.  Determine the blkg it's associated with.
 379  */
 380 static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd)
 381 {
 382         return pd ? pd->blkg : NULL;
 383 }
 384
 385 static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd)
 386 {
 387         return cpd ? cpd->blkcg : NULL;
 388 }
 389
 390 extern void blkcg_destroy_blkgs(struct blkcg *blkcg);
 391
 392 #ifdef CONFIG_CGROUP_WRITEBACK
 393
 394 /**
 395  * blkcg_cgwb_get - get a reference for blkcg->cgwb_list
 396  * @blkcg: blkcg of interest
 397  *
 398  * This is used to track the number of active wb's related to a blkcg.
 399  */
 400 static inline void blkcg_cgwb_get(struct blkcg *blkcg)
 401 {
 402         refcount_inc(&blkcg->cgwb_refcnt);
 403 }
 404
 405 /**
 406  * blkcg_cgwb_put - put a reference for @blkcg->cgwb_list
 407  * @blkcg: blkcg of interest
 408  *
 409  * This is used to track the number of active wb's related to a blkcg.
 410  * When this count goes to zero, all active wb has finished so the
 411  * blkcg can continue destruction by calling blkcg_destroy_blkgs().
 412  * This work may occur in cgwb_release_workfn() on the cgwb_release
 413  * workqueue.
 414  */
 415 static inline void blkcg_cgwb_put(struct blkcg *blkcg)
 416 {
 417         if (refcount_dec_and_test(&blkcg->cgwb_refcnt))
 418                 blkcg_destroy_blkgs(blkcg);
 419 }
 420
 421 #else
 422
 423 static inline void blkcg_cgwb_get(struct blkcg *blkcg) { }
 424
 425 static inline void blkcg_cgwb_put(struct blkcg *blkcg)
 426 {
 427         /* wb isn't being accounted, so trigger destruction right away */
 428         blkcg_destroy_blkgs(blkcg);
 429 }
 430
 431 #endif
 432
 433 /**
 434  * blkg_path - format cgroup path of blkg
 435  * @blkg: blkg of interest
 436  * @buf: target buffer
 437  * @buflen: target buffer length
 438  *
 439  * Format the path of the cgroup of @blkg into @buf.
 440  */
 441 static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
 442 {
 443         return cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
 444 }
 445
 446 /**
 447  * blkg_get - get a blkg reference
 448  * @blkg: blkg to get
 449  *
 450  * The caller should be holding an existing reference.
 451  */
 452 static inline void blkg_get(struct blkcg_gq *blkg)
 453 {
 454         WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
 455         atomic_inc(&blkg->refcnt);
 456 }
 457
 458 /**
 459  * blkg_try_get - try and get a blkg reference
 460  * @blkg: blkg to get
 461  *
 462  * This is for use when doing an RCU lookup of the blkg.  We may be in the midst
 463  * of freeing this blkg, so we can only use it if the refcnt is not zero.
 464  */
 465 static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg)
 466 {
 467         if (atomic_inc_not_zero(&blkg->refcnt))
 468                 return blkg;
 469         return NULL;
 470 }
 471
 472
 473 void __blkg_release_rcu(struct rcu_head *rcu);
 474
 475 /**
 476  * blkg_put - put a blkg reference
 477  * @blkg: blkg to put
 478  */
 479 static inline void blkg_put(struct blkcg_gq *blkg)
 480 {
 481         WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
 482         if (atomic_dec_and_test(&blkg->refcnt))
 483                 call_rcu(&blkg->rcu_head, __blkg_release_rcu);
 484 }
 485
 486 /**
 487  * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants
 488  * @d_blkg: loop cursor pointing to the current descendant
 489  * @pos_css: used for iteration
 490  * @p_blkg: target blkg to walk descendants of
 491  *
 492  * Walk @c_blkg through the descendants of @p_blkg.  Must be used with RCU
 493  * read locked.  If called under either blkcg or queue lock, the iteration
 494  * is guaranteed to include all and only online blkgs.  The caller may
 495  * update @pos_css by calling css_rightmost_descendant() to skip subtree.
 496  * @p_blkg is included in the iteration and the first node to be visited.
 497  */
 498 #define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg)           \
 499         css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css)   \
 500                 if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css),    \
 501                                               (p_blkg)->q, false)))
 502
 503 /**
 504  * blkg_for_each_descendant_post - post-order walk of a blkg's descendants
 505  * @d_blkg: loop cursor pointing to the current descendant
 506  * @pos_css: used for iteration
 507  * @p_blkg: target blkg to walk descendants of
 508  *
 509  * Similar to blkg_for_each_descendant_pre() but performs post-order
 510  * traversal instead.  Synchronization rules are the same.  @p_blkg is
 511  * included in the iteration and the last node to be visited.
 512  */
 513 #define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg)          \
 514         css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css)  \
 515                 if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css),    \
 516                                               (p_blkg)->q, false)))
 517
 518 /**
 519  * blk_get_rl - get request_list to use
 520  * @q: request_queue of interest
 521  * @bio: bio which will be attached to the allocated request (may be %NULL)
 522  *
 523  * The caller wants to allocate a request from @q to use for @bio.  Find
 524  * the request_list to use and obtain a reference on it.  Should be called
 525  * under queue_lock.  This function is guaranteed to return non-%NULL
 526  * request_list.
 527  */
 528 static inline struct request_list *blk_get_rl(struct request_queue *q,
 529                                               struct bio *bio)
 530 {
 531         struct blkcg *blkcg;
 532         struct blkcg_gq *blkg;
 533
 534         rcu_read_lock();
 535
 536         blkcg = bio_blkcg(bio);
 537
 538         /* bypass blkg lookup and use @q->root_rl directly for root */
 539         if (blkcg == &blkcg_root)
 540                 goto root_rl;
 541
 542         /*
 543          * Try to use blkg->rl.  blkg lookup may fail under memory pressure
 544          * or if either the blkcg or queue is going away.  Fall back to
 545          * root_rl in such cases.
 546          */
 547         blkg = blkg_lookup(blkcg, q);
 548         if (unlikely(!blkg))
 549                 goto root_rl;
 550
 551         blkg_get(blkg);
 552         rcu_read_unlock();
 553         return &blkg->rl;
 554 root_rl:
 555         rcu_read_unlock();
 556         return &q->root_rl;
 557 }
 558
 559 /**
 560  * blk_put_rl - put request_list
 561  * @rl: request_list to put
 562  *
 563  * Put the reference acquired by blk_get_rl().  Should be called under
 564  * queue_lock.
 565  */
 566 static inline void blk_put_rl(struct request_list *rl)
 567 {
 568         if (rl->blkg->blkcg != &blkcg_root)
 569                 blkg_put(rl->blkg);
 570 }
 571
 572 /**
 573  * blk_rq_set_rl - associate a request with a request_list
 574  * @rq: request of interest
 575  * @rl: target request_list
 576  *
 577  * Associate @rq with @rl so that accounting and freeing can know the
 578  * request_list @rq came from.
 579  */
 580 static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl)
 581 {
 582         rq->rl = rl;
 583 }
 584
 585 /**
 586  * blk_rq_rl - return the request_list a request came from
 587  * @rq: request of interest
 588  *
 589  * Return the request_list @rq is allocated from.
 590  */
 591 static inline struct request_list *blk_rq_rl(struct request *rq)
 592 {
 593         return rq->rl;
 594 }
 595
 596 struct request_list *__blk_queue_next_rl(struct request_list *rl,
 597                                          struct request_queue *q);
 598 /**
 599  * blk_queue_for_each_rl - iterate through all request_lists of a request_queue
 600  *
 601  * Should be used under queue_lock.
 602  */
 603 #define blk_queue_for_each_rl(rl, q)    \
 604         for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q)))
 605
 606 static inline int blkg_stat_init(struct blkg_stat *stat, gfp_t gfp)
 607 {
 608         int ret;
 609
 610         ret = percpu_counter_init(&stat->cpu_cnt, 0, gfp);
 611         if (ret)
 612                 return ret;
 613
 614         atomic64_set(&stat->aux_cnt, 0);
 615         return 0;
 616 }
 617
 618 static inline void blkg_stat_exit(struct blkg_stat *stat)
 619 {
 620         percpu_counter_destroy(&stat->cpu_cnt);
 621 }
 622
 623 /**
 624  * blkg_stat_add - add a value to a blkg_stat
 625  * @stat: target blkg_stat
 626  * @val: value to add
 627  *
 628  * Add @val to @stat.  The caller must ensure that IRQ on the same CPU
 629  * don't re-enter this function for the same counter.
 630  */
 631 static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val)
 632 {
 633         percpu_counter_add_batch(&stat->cpu_cnt, val, BLKG_STAT_CPU_BATCH);
 634 }
 635
 636 /**
 637  * blkg_stat_read - read the current value of a blkg_stat
 638  * @stat: blkg_stat to read
 639  */
 640 static inline uint64_t blkg_stat_read(struct blkg_stat *stat)
 641 {
 642         return percpu_counter_sum_positive(&stat->cpu_cnt);
 643 }
 644
 645 /**
 646  * blkg_stat_reset - reset a blkg_stat
 647  * @stat: blkg_stat to reset
 648  */
 649 static inline void blkg_stat_reset(struct blkg_stat *stat)
 650 {
 651         percpu_counter_set(&stat->cpu_cnt, 0);
 652         atomic64_set(&stat->aux_cnt, 0);
 653 }
 654
 655 /**
 656  * blkg_stat_add_aux - add a blkg_stat into another's aux count
 657  * @to: the destination blkg_stat
 658  * @from: the source
 659  *
 660  * Add @from's count including the aux one to @to's aux count.
 661  */
 662 static inline void blkg_stat_add_aux(struct blkg_stat *to,
 663                                      struct blkg_stat *from)
 664 {
 665         atomic64_add(blkg_stat_read(from) + atomic64_read(&from->aux_cnt),
 666                      &to->aux_cnt);
 667 }
 668
 669 static inline int blkg_rwstat_init(struct blkg_rwstat *rwstat, gfp_t gfp)
 670 {
 671         int i, ret;
 672
 673         for (i = 0; i < BLKG_RWSTAT_NR; i++) {
 674                 ret = percpu_counter_init(&rwstat->cpu_cnt[i], 0, gfp);
 675                 if (ret) {
 676                         while (--i >= 0)
 677                                 percpu_counter_destroy(&rwstat->cpu_cnt[i]);
 678                         return ret;
 679                 }
 680                 atomic64_set(&rwstat->aux_cnt[i], 0);
 681         }
 682         return 0;
 683 }
 684
 685 static inline void blkg_rwstat_exit(struct blkg_rwstat *rwstat)
 686 {
 687         int i;
 688
 689         for (i = 0; i < BLKG_RWSTAT_NR; i++)
 690                 percpu_counter_destroy(&rwstat->cpu_cnt[i]);
 691 }
 692
 693 /**
 694  * blkg_rwstat_add - add a value to a blkg_rwstat
 695  * @rwstat: target blkg_rwstat
 696  * @op: REQ_OP and flags
 697  * @val: value to add
 698  *
 699  * Add @val to @rwstat.  The counters are chosen according to @rw.  The
 700  * caller is responsible for synchronizing calls to this function.
 701  */
 702 static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
 703                                    unsigned int op, uint64_t val)
 704 {
 705         struct percpu_counter *cnt;
 706
 707         if (op_is_discard(op))
 708                 cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_DISCARD];
 709         else if (op_is_write(op))
 710                 cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE];
 711         else
 712                 cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ];
 713
 714         percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH);
 715
 716         if (op_is_sync(op))
 717                 cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC];
 718         else
 719                 cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC];
 720
 721         percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH);
 722 }
 723
 724 /**
 725  * blkg_rwstat_read - read the current values of a blkg_rwstat
 726  * @rwstat: blkg_rwstat to read
 727  *
 728  * Read the current snapshot of @rwstat and return it in the aux counts.
 729  */
 730 static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat)
 731 {
 732         struct blkg_rwstat result;
 733         int i;
 734
 735         for (i = 0; i < BLKG_RWSTAT_NR; i++)
 736                 atomic64_set(&result.aux_cnt[i],
 737                              percpu_counter_sum_positive(&rwstat->cpu_cnt[i]));
 738         return result;
 739 }
 740
 741 /**
 742  * blkg_rwstat_total - read the total count of a blkg_rwstat
 743  * @rwstat: blkg_rwstat to read
 744  *
 745  * Return the total count of @rwstat regardless of the IO direction.  This
 746  * function can be called without synchronization and takes care of u64
 747  * atomicity.
 748  */
 749 static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat)
 750 {
 751         struct blkg_rwstat tmp = blkg_rwstat_read(rwstat);
 752
 753         return atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_READ]) +
 754                 atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_WRITE]);
 755 }
 756
 757 /**
 758  * blkg_rwstat_reset - reset a blkg_rwstat
 759  * @rwstat: blkg_rwstat to reset
 760  */
 761 static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
 762 {
 763         int i;
 764
 765         for (i = 0; i < BLKG_RWSTAT_NR; i++) {
 766                 percpu_counter_set(&rwstat->cpu_cnt[i], 0);
 767                 atomic64_set(&rwstat->aux_cnt[i], 0);
 768         }
 769 }
 770
 771 /**
 772  * blkg_rwstat_add_aux - add a blkg_rwstat into another's aux count
 773  * @to: the destination blkg_rwstat
 774  * @from: the source
 775  *
 776  * Add @from's count including the aux one to @to's aux count.
 777  */
 778 static inline void blkg_rwstat_add_aux(struct blkg_rwstat *to,
 779                                        struct blkg_rwstat *from)
 780 {
 781         u64 sum[BLKG_RWSTAT_NR];
 782         int i;
 783
 784         for (i = 0; i < BLKG_RWSTAT_NR; i++)
 785                 sum[i] = percpu_counter_sum_positive(&from->cpu_cnt[i]);
 786
 787         for (i = 0; i < BLKG_RWSTAT_NR; i++)
 788                 atomic64_add(sum[i] + atomic64_read(&from->aux_cnt[i]),
 789                              &to->aux_cnt[i]);
 790 }
 791
 792 #ifdef CONFIG_BLK_DEV_THROTTLING
 793 extern bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
 794                            struct bio *bio);
 795 #else
 796 static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
 797                                   struct bio *bio) { return false; }
 798 #endif
 799
 800 static inline bool blkcg_bio_issue_check(struct request_queue *q,
 801                                          struct bio *bio)
 802 {
 803         struct blkcg *blkcg;
 804         struct blkcg_gq *blkg;
 805         bool throtl = false;
 806
 807         rcu_read_lock();
 808         blkcg = bio_blkcg(bio);
 809
 810         /* associate blkcg if bio hasn't attached one */
 811         bio_associate_blkcg(bio, &blkcg->css);
 812
 813         blkg = blkg_lookup(blkcg, q);
 814         if (unlikely(!blkg)) {
 815                 spin_lock_irq(q->queue_lock);
 816                 blkg = blkg_lookup_create(blkcg, q);
 817                 if (IS_ERR(blkg))
 818                         blkg = NULL;
 819                 spin_unlock_irq(q->queue_lock);
 820         }
 821
 822         throtl = blk_throtl_bio(q, blkg, bio);
 823
 824         if (!throtl) {
 825                 blkg = blkg ?: q->root_blkg;
 826                 /*
 827                  * If the bio is flagged with BIO_QUEUE_ENTERED it means this
 828                  * is a split bio and we would have already accounted for the
 829                  * size of the bio.
 830                  */
 831                 if (!bio_flagged(bio, BIO_QUEUE_ENTERED))
 832                         blkg_rwstat_add(&blkg->stat_bytes, bio->bi_opf,
 833                                         bio->bi_iter.bi_size);
 834                 blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1);
 835         }
 836
 837         rcu_read_unlock();
 838         return !throtl;
 839 }
 840
 841 static inline void blkcg_use_delay(struct blkcg_gq *blkg)
 842 {
 843         if (atomic_add_return(1, &blkg->use_delay) == 1)
 844                 atomic_inc(&blkg->blkcg->css.cgroup->congestion_count);
 845 }
 846
 847 static inline int blkcg_unuse_delay(struct blkcg_gq *blkg)
 848 {
 849         int old = atomic_read(&blkg->use_delay);
 850
 851         if (old == 0)
 852                 return 0;
 853
 854         /*
 855          * We do this song and dance because we can race with somebody else
 856          * adding or removing delay.  If we just did an atomic_dec we'd end up
 857          * negative and we'd already be in trouble.  We need to subtract 1 and
 858          * then check to see if we were the last delay so we can drop the
 859          * congestion count on the cgroup.
 860          */
 861         while (old) {
 862                 int cur = atomic_cmpxchg(&blkg->use_delay, old, old - 1);
 863                 if (cur == old)
 864                         break;
 865                 old = cur;
 866         }
 867
 868         if (old == 0)
 869                 return 0;
 870         if (old == 1)
 871                 atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
 872         return 1;
 873 }
 874
 875 static inline void blkcg_clear_delay(struct blkcg_gq *blkg)
 876 {
 877         int old = atomic_read(&blkg->use_delay);
 878         if (!old)
 879                 return;
 880         /* We only want 1 person clearing the congestion count for this blkg. */
 881         while (old) {
 882                 int cur = atomic_cmpxchg(&blkg->use_delay, old, 0);
 883                 if (cur == old) {
 884                         atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
 885                         break;
 886                 }
 887                 old = cur;
 888         }
 889 }
 890
 891 void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta);
 892 void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay);
 893 void blkcg_maybe_throttle_current(void);
 894 #else   /* CONFIG_BLK_CGROUP */
 895
 896 struct blkcg {
 897 };
 898
 899 struct blkg_policy_data {
 900 };
 901
 902 struct blkcg_policy_data {
 903 };
 904
 905 struct blkcg_gq {
 906 };
 907
 908 struct blkcg_policy {
 909 };
 910
 911 #define blkcg_root_css  ((struct cgroup_subsys_state *)ERR_PTR(-EINVAL))
 912
 913 static inline void blkcg_maybe_throttle_current(void) { }
 914 static inline bool blk_cgroup_congested(void) { return false; }
 915
 916 #ifdef CONFIG_BLOCK
 917
 918 static inline void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) { }
 919
 920 static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
 921 static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
 922 { return NULL; }
 923 static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
 924 static inline void blkcg_drain_queue(struct request_queue *q) { }
 925 static inline void blkcg_exit_queue(struct request_queue *q) { }
 926 static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; }
 927 static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { }
 928 static inline int blkcg_activate_policy(struct request_queue *q,
 929                                         const struct blkcg_policy *pol) { return 0; }
 930 static inline void blkcg_deactivate_policy(struct request_queue *q,
 931                                            const struct blkcg_policy *pol) { }
 932
 933 static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
 934
 935 static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
 936                                                   struct blkcg_policy *pol) { return NULL; }
 937 static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; }
 938 static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; }
 939 static inline void blkg_get(struct blkcg_gq *blkg) { }
 940 static inline void blkg_put(struct blkcg_gq *blkg) { }
 941
 942 static inline struct request_list *blk_get_rl(struct request_queue *q,
 943                                               struct bio *bio) { return &q->root_rl; }
 944 static inline void blk_put_rl(struct request_list *rl) { }
 945 static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { }
 946 static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; }
 947
 948 static inline bool blkcg_bio_issue_check(struct request_queue *q,
 949                                          struct bio *bio) { return true; }
 950
 951 #define blk_queue_for_each_rl(rl, q)    \
 952         for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)
 953
 954 #endif  /* CONFIG_BLOCK */
 955 #endif  /* CONFIG_BLK_CGROUP */
 956 #endif  /* _BLK_CGROUP_H */