2 * Common Block IO controller cgroup interface
4 * Based on ideas and code from CFQ, CFS and BFQ:
5 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
7 * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
8 * Paolo Valente <paolo.valente@unimore.it>
10 * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
11 * Nauman Rafique <nauman@google.com>
13 #include <linux/ioprio.h>
14 #include <linux/seq_file.h>
15 #include <linux/kdev_t.h>
16 #include <linux/module.h>
17 #include <linux/err.h>
18 #include <linux/blkdev.h>
19 #include <linux/slab.h>
20 #include "blk-cgroup.h"
21 #include <linux/genhd.h>
23 #define MAX_KEY_LEN 100
25 static DEFINE_SPINLOCK(blkio_list_lock
);
26 static LIST_HEAD(blkio_list
);
28 struct blkio_cgroup blkio_root_cgroup
= { .weight
= 2*BLKIO_WEIGHT_DEFAULT
};
29 EXPORT_SYMBOL_GPL(blkio_root_cgroup
);
31 static struct cgroup_subsys_state
*blkiocg_create(struct cgroup
*);
32 static int blkiocg_can_attach(struct cgroup
*, struct cgroup_taskset
*);
33 static void blkiocg_attach(struct cgroup
*, struct cgroup_taskset
*);
34 static void blkiocg_destroy(struct cgroup
*);
35 static int blkiocg_populate(struct cgroup_subsys
*, struct cgroup
*);
37 /* for encoding cft->private value on file */
38 #define BLKIOFILE_PRIVATE(x, val) (((x) << 16) | (val))
39 /* What policy owns the file, proportional or throttle */
40 #define BLKIOFILE_POLICY(val) (((val) >> 16) & 0xffff)
41 #define BLKIOFILE_ATTR(val) ((val) & 0xffff)
43 struct cgroup_subsys blkio_subsys
= {
45 .create
= blkiocg_create
,
46 .can_attach
= blkiocg_can_attach
,
47 .attach
= blkiocg_attach
,
48 .destroy
= blkiocg_destroy
,
49 .populate
= blkiocg_populate
,
50 #ifdef CONFIG_BLK_CGROUP
51 /* note: blkio_subsys_id is otherwise defined in blk-cgroup.h */
52 .subsys_id
= blkio_subsys_id
,
55 .module
= THIS_MODULE
,
57 EXPORT_SYMBOL_GPL(blkio_subsys
);
59 static inline void blkio_policy_insert_node(struct blkio_cgroup
*blkcg
,
60 struct blkio_policy_node
*pn
)
62 list_add(&pn
->node
, &blkcg
->policy_list
);
65 static inline bool cftype_blkg_same_policy(struct cftype
*cft
,
66 struct blkio_group
*blkg
)
68 enum blkio_policy_id plid
= BLKIOFILE_POLICY(cft
->private);
70 if (blkg
->plid
== plid
)
76 /* Determines if policy node matches cgroup file being accessed */
77 static inline bool pn_matches_cftype(struct cftype
*cft
,
78 struct blkio_policy_node
*pn
)
80 enum blkio_policy_id plid
= BLKIOFILE_POLICY(cft
->private);
81 int fileid
= BLKIOFILE_ATTR(cft
->private);
83 return (plid
== pn
->plid
&& fileid
== pn
->fileid
);
86 /* Must be called with blkcg->lock held */
87 static inline void blkio_policy_delete_node(struct blkio_policy_node
*pn
)
92 /* Must be called with blkcg->lock held */
93 static struct blkio_policy_node
*
94 blkio_policy_search_node(const struct blkio_cgroup
*blkcg
, dev_t dev
,
95 enum blkio_policy_id plid
, int fileid
)
97 struct blkio_policy_node
*pn
;
99 list_for_each_entry(pn
, &blkcg
->policy_list
, node
) {
100 if (pn
->dev
== dev
&& pn
->plid
== plid
&& pn
->fileid
== fileid
)
107 struct blkio_cgroup
*cgroup_to_blkio_cgroup(struct cgroup
*cgroup
)
109 return container_of(cgroup_subsys_state(cgroup
, blkio_subsys_id
),
110 struct blkio_cgroup
, css
);
112 EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup
);
114 struct blkio_cgroup
*task_blkio_cgroup(struct task_struct
*tsk
)
116 return container_of(task_subsys_state(tsk
, blkio_subsys_id
),
117 struct blkio_cgroup
, css
);
119 EXPORT_SYMBOL_GPL(task_blkio_cgroup
);
122 blkio_update_group_weight(struct blkio_group
*blkg
, unsigned int weight
)
124 struct blkio_policy_type
*blkiop
;
126 list_for_each_entry(blkiop
, &blkio_list
, list
) {
127 /* If this policy does not own the blkg, do not send updates */
128 if (blkiop
->plid
!= blkg
->plid
)
130 if (blkiop
->ops
.blkio_update_group_weight_fn
)
131 blkiop
->ops
.blkio_update_group_weight_fn(blkg
->key
,
136 static inline void blkio_update_group_bps(struct blkio_group
*blkg
, u64 bps
,
139 struct blkio_policy_type
*blkiop
;
141 list_for_each_entry(blkiop
, &blkio_list
, list
) {
143 /* If this policy does not own the blkg, do not send updates */
144 if (blkiop
->plid
!= blkg
->plid
)
147 if (fileid
== BLKIO_THROTL_read_bps_device
148 && blkiop
->ops
.blkio_update_group_read_bps_fn
)
149 blkiop
->ops
.blkio_update_group_read_bps_fn(blkg
->key
,
152 if (fileid
== BLKIO_THROTL_write_bps_device
153 && blkiop
->ops
.blkio_update_group_write_bps_fn
)
154 blkiop
->ops
.blkio_update_group_write_bps_fn(blkg
->key
,
159 static inline void blkio_update_group_iops(struct blkio_group
*blkg
,
160 unsigned int iops
, int fileid
)
162 struct blkio_policy_type
*blkiop
;
164 list_for_each_entry(blkiop
, &blkio_list
, list
) {
166 /* If this policy does not own the blkg, do not send updates */
167 if (blkiop
->plid
!= blkg
->plid
)
170 if (fileid
== BLKIO_THROTL_read_iops_device
171 && blkiop
->ops
.blkio_update_group_read_iops_fn
)
172 blkiop
->ops
.blkio_update_group_read_iops_fn(blkg
->key
,
175 if (fileid
== BLKIO_THROTL_write_iops_device
176 && blkiop
->ops
.blkio_update_group_write_iops_fn
)
177 blkiop
->ops
.blkio_update_group_write_iops_fn(blkg
->key
,
183 * Add to the appropriate stat variable depending on the request type.
184 * This should be called with the blkg->stats_lock held.
186 static void blkio_add_stat(uint64_t *stat
, uint64_t add
, bool direction
,
190 stat
[BLKIO_STAT_WRITE
] += add
;
192 stat
[BLKIO_STAT_READ
] += add
;
194 stat
[BLKIO_STAT_SYNC
] += add
;
196 stat
[BLKIO_STAT_ASYNC
] += add
;
200 * Decrements the appropriate stat variable if non-zero depending on the
201 * request type. Panics on value being zero.
202 * This should be called with the blkg->stats_lock held.
204 static void blkio_check_and_dec_stat(uint64_t *stat
, bool direction
, bool sync
)
207 BUG_ON(stat
[BLKIO_STAT_WRITE
] == 0);
208 stat
[BLKIO_STAT_WRITE
]--;
210 BUG_ON(stat
[BLKIO_STAT_READ
] == 0);
211 stat
[BLKIO_STAT_READ
]--;
214 BUG_ON(stat
[BLKIO_STAT_SYNC
] == 0);
215 stat
[BLKIO_STAT_SYNC
]--;
217 BUG_ON(stat
[BLKIO_STAT_ASYNC
] == 0);
218 stat
[BLKIO_STAT_ASYNC
]--;
222 #ifdef CONFIG_DEBUG_BLK_CGROUP
223 /* This should be called with the blkg->stats_lock held. */
224 static void blkio_set_start_group_wait_time(struct blkio_group
*blkg
,
225 struct blkio_group
*curr_blkg
)
227 if (blkio_blkg_waiting(&blkg
->stats
))
229 if (blkg
== curr_blkg
)
231 blkg
->stats
.start_group_wait_time
= sched_clock();
232 blkio_mark_blkg_waiting(&blkg
->stats
);
235 /* This should be called with the blkg->stats_lock held. */
236 static void blkio_update_group_wait_time(struct blkio_group_stats
*stats
)
238 unsigned long long now
;
240 if (!blkio_blkg_waiting(stats
))
244 if (time_after64(now
, stats
->start_group_wait_time
))
245 stats
->group_wait_time
+= now
- stats
->start_group_wait_time
;
246 blkio_clear_blkg_waiting(stats
);
249 /* This should be called with the blkg->stats_lock held. */
250 static void blkio_end_empty_time(struct blkio_group_stats
*stats
)
252 unsigned long long now
;
254 if (!blkio_blkg_empty(stats
))
258 if (time_after64(now
, stats
->start_empty_time
))
259 stats
->empty_time
+= now
- stats
->start_empty_time
;
260 blkio_clear_blkg_empty(stats
);
263 void blkiocg_update_set_idle_time_stats(struct blkio_group
*blkg
)
267 spin_lock_irqsave(&blkg
->stats_lock
, flags
);
268 BUG_ON(blkio_blkg_idling(&blkg
->stats
));
269 blkg
->stats
.start_idle_time
= sched_clock();
270 blkio_mark_blkg_idling(&blkg
->stats
);
271 spin_unlock_irqrestore(&blkg
->stats_lock
, flags
);
273 EXPORT_SYMBOL_GPL(blkiocg_update_set_idle_time_stats
);
275 void blkiocg_update_idle_time_stats(struct blkio_group
*blkg
)
278 unsigned long long now
;
279 struct blkio_group_stats
*stats
;
281 spin_lock_irqsave(&blkg
->stats_lock
, flags
);
282 stats
= &blkg
->stats
;
283 if (blkio_blkg_idling(stats
)) {
285 if (time_after64(now
, stats
->start_idle_time
))
286 stats
->idle_time
+= now
- stats
->start_idle_time
;
287 blkio_clear_blkg_idling(stats
);
289 spin_unlock_irqrestore(&blkg
->stats_lock
, flags
);
291 EXPORT_SYMBOL_GPL(blkiocg_update_idle_time_stats
);
293 void blkiocg_update_avg_queue_size_stats(struct blkio_group
*blkg
)
296 struct blkio_group_stats
*stats
;
298 spin_lock_irqsave(&blkg
->stats_lock
, flags
);
299 stats
= &blkg
->stats
;
300 stats
->avg_queue_size_sum
+=
301 stats
->stat_arr
[BLKIO_STAT_QUEUED
][BLKIO_STAT_READ
] +
302 stats
->stat_arr
[BLKIO_STAT_QUEUED
][BLKIO_STAT_WRITE
];
303 stats
->avg_queue_size_samples
++;
304 blkio_update_group_wait_time(stats
);
305 spin_unlock_irqrestore(&blkg
->stats_lock
, flags
);
307 EXPORT_SYMBOL_GPL(blkiocg_update_avg_queue_size_stats
);
309 void blkiocg_set_start_empty_time(struct blkio_group
*blkg
)
312 struct blkio_group_stats
*stats
;
314 spin_lock_irqsave(&blkg
->stats_lock
, flags
);
315 stats
= &blkg
->stats
;
317 if (stats
->stat_arr
[BLKIO_STAT_QUEUED
][BLKIO_STAT_READ
] ||
318 stats
->stat_arr
[BLKIO_STAT_QUEUED
][BLKIO_STAT_WRITE
]) {
319 spin_unlock_irqrestore(&blkg
->stats_lock
, flags
);
324 * group is already marked empty. This can happen if cfqq got new
325 * request in parent group and moved to this group while being added
326 * to service tree. Just ignore the event and move on.
328 if(blkio_blkg_empty(stats
)) {
329 spin_unlock_irqrestore(&blkg
->stats_lock
, flags
);
333 stats
->start_empty_time
= sched_clock();
334 blkio_mark_blkg_empty(stats
);
335 spin_unlock_irqrestore(&blkg
->stats_lock
, flags
);
337 EXPORT_SYMBOL_GPL(blkiocg_set_start_empty_time
);
339 void blkiocg_update_dequeue_stats(struct blkio_group
*blkg
,
340 unsigned long dequeue
)
342 blkg
->stats
.dequeue
+= dequeue
;
344 EXPORT_SYMBOL_GPL(blkiocg_update_dequeue_stats
);
346 static inline void blkio_set_start_group_wait_time(struct blkio_group
*blkg
,
347 struct blkio_group
*curr_blkg
) {}
348 static inline void blkio_end_empty_time(struct blkio_group_stats
*stats
) {}
351 void blkiocg_update_io_add_stats(struct blkio_group
*blkg
,
352 struct blkio_group
*curr_blkg
, bool direction
,
357 spin_lock_irqsave(&blkg
->stats_lock
, flags
);
358 blkio_add_stat(blkg
->stats
.stat_arr
[BLKIO_STAT_QUEUED
], 1, direction
,
360 blkio_end_empty_time(&blkg
->stats
);
361 blkio_set_start_group_wait_time(blkg
, curr_blkg
);
362 spin_unlock_irqrestore(&blkg
->stats_lock
, flags
);
364 EXPORT_SYMBOL_GPL(blkiocg_update_io_add_stats
);
366 void blkiocg_update_io_remove_stats(struct blkio_group
*blkg
,
367 bool direction
, bool sync
)
371 spin_lock_irqsave(&blkg
->stats_lock
, flags
);
372 blkio_check_and_dec_stat(blkg
->stats
.stat_arr
[BLKIO_STAT_QUEUED
],
374 spin_unlock_irqrestore(&blkg
->stats_lock
, flags
);
376 EXPORT_SYMBOL_GPL(blkiocg_update_io_remove_stats
);
378 void blkiocg_update_timeslice_used(struct blkio_group
*blkg
, unsigned long time
,
379 unsigned long unaccounted_time
)
383 spin_lock_irqsave(&blkg
->stats_lock
, flags
);
384 blkg
->stats
.time
+= time
;
385 #ifdef CONFIG_DEBUG_BLK_CGROUP
386 blkg
->stats
.unaccounted_time
+= unaccounted_time
;
388 spin_unlock_irqrestore(&blkg
->stats_lock
, flags
);
390 EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used
);
393 * should be called under rcu read lock or queue lock to make sure blkg pointer
396 void blkiocg_update_dispatch_stats(struct blkio_group
*blkg
,
397 uint64_t bytes
, bool direction
, bool sync
)
399 struct blkio_group_stats_cpu
*stats_cpu
;
403 * Disabling interrupts to provide mutual exclusion between two
404 * writes on same cpu. It probably is not needed for 64bit. Not
405 * optimizing that case yet.
407 local_irq_save(flags
);
409 stats_cpu
= this_cpu_ptr(blkg
->stats_cpu
);
411 u64_stats_update_begin(&stats_cpu
->syncp
);
412 stats_cpu
->sectors
+= bytes
>> 9;
413 blkio_add_stat(stats_cpu
->stat_arr_cpu
[BLKIO_STAT_CPU_SERVICED
],
415 blkio_add_stat(stats_cpu
->stat_arr_cpu
[BLKIO_STAT_CPU_SERVICE_BYTES
],
416 bytes
, direction
, sync
);
417 u64_stats_update_end(&stats_cpu
->syncp
);
418 local_irq_restore(flags
);
420 EXPORT_SYMBOL_GPL(blkiocg_update_dispatch_stats
);
422 void blkiocg_update_completion_stats(struct blkio_group
*blkg
,
423 uint64_t start_time
, uint64_t io_start_time
, bool direction
, bool sync
)
425 struct blkio_group_stats
*stats
;
427 unsigned long long now
= sched_clock();
429 spin_lock_irqsave(&blkg
->stats_lock
, flags
);
430 stats
= &blkg
->stats
;
431 if (time_after64(now
, io_start_time
))
432 blkio_add_stat(stats
->stat_arr
[BLKIO_STAT_SERVICE_TIME
],
433 now
- io_start_time
, direction
, sync
);
434 if (time_after64(io_start_time
, start_time
))
435 blkio_add_stat(stats
->stat_arr
[BLKIO_STAT_WAIT_TIME
],
436 io_start_time
- start_time
, direction
, sync
);
437 spin_unlock_irqrestore(&blkg
->stats_lock
, flags
);
439 EXPORT_SYMBOL_GPL(blkiocg_update_completion_stats
);
441 /* Merged stats are per cpu. */
442 void blkiocg_update_io_merged_stats(struct blkio_group
*blkg
, bool direction
,
445 struct blkio_group_stats_cpu
*stats_cpu
;
449 * Disabling interrupts to provide mutual exclusion between two
450 * writes on same cpu. It probably is not needed for 64bit. Not
451 * optimizing that case yet.
453 local_irq_save(flags
);
455 stats_cpu
= this_cpu_ptr(blkg
->stats_cpu
);
457 u64_stats_update_begin(&stats_cpu
->syncp
);
458 blkio_add_stat(stats_cpu
->stat_arr_cpu
[BLKIO_STAT_CPU_MERGED
], 1,
460 u64_stats_update_end(&stats_cpu
->syncp
);
461 local_irq_restore(flags
);
463 EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats
);
466 * This function allocates the per cpu stats for blkio_group. Should be called
467 * from sleepable context as alloc_per_cpu() requires that.
469 int blkio_alloc_blkg_stats(struct blkio_group
*blkg
)
471 /* Allocate memory for per cpu stats */
472 blkg
->stats_cpu
= alloc_percpu(struct blkio_group_stats_cpu
);
473 if (!blkg
->stats_cpu
)
477 EXPORT_SYMBOL_GPL(blkio_alloc_blkg_stats
);
479 void blkiocg_add_blkio_group(struct blkio_cgroup
*blkcg
,
480 struct blkio_group
*blkg
, void *key
, dev_t dev
,
481 enum blkio_policy_id plid
)
485 spin_lock_irqsave(&blkcg
->lock
, flags
);
486 spin_lock_init(&blkg
->stats_lock
);
487 rcu_assign_pointer(blkg
->key
, key
);
488 blkg
->blkcg_id
= css_id(&blkcg
->css
);
489 hlist_add_head_rcu(&blkg
->blkcg_node
, &blkcg
->blkg_list
);
491 spin_unlock_irqrestore(&blkcg
->lock
, flags
);
492 /* Need to take css reference ? */
493 cgroup_path(blkcg
->css
.cgroup
, blkg
->path
, sizeof(blkg
->path
));
496 EXPORT_SYMBOL_GPL(blkiocg_add_blkio_group
);
498 static void __blkiocg_del_blkio_group(struct blkio_group
*blkg
)
500 hlist_del_init_rcu(&blkg
->blkcg_node
);
505 * returns 0 if blkio_group was still on cgroup list. Otherwise returns 1
506 * indicating that blk_group was unhashed by the time we got to it.
508 int blkiocg_del_blkio_group(struct blkio_group
*blkg
)
510 struct blkio_cgroup
*blkcg
;
512 struct cgroup_subsys_state
*css
;
516 css
= css_lookup(&blkio_subsys
, blkg
->blkcg_id
);
518 blkcg
= container_of(css
, struct blkio_cgroup
, css
);
519 spin_lock_irqsave(&blkcg
->lock
, flags
);
520 if (!hlist_unhashed(&blkg
->blkcg_node
)) {
521 __blkiocg_del_blkio_group(blkg
);
524 spin_unlock_irqrestore(&blkcg
->lock
, flags
);
530 EXPORT_SYMBOL_GPL(blkiocg_del_blkio_group
);
532 /* called under rcu_read_lock(). */
533 struct blkio_group
*blkiocg_lookup_group(struct blkio_cgroup
*blkcg
, void *key
)
535 struct blkio_group
*blkg
;
536 struct hlist_node
*n
;
539 hlist_for_each_entry_rcu(blkg
, n
, &blkcg
->blkg_list
, blkcg_node
) {
547 EXPORT_SYMBOL_GPL(blkiocg_lookup_group
);
549 static void blkio_reset_stats_cpu(struct blkio_group
*blkg
)
551 struct blkio_group_stats_cpu
*stats_cpu
;
554 * Note: On 64 bit arch this should not be an issue. This has the
555 * possibility of returning some inconsistent value on 32bit arch
556 * as 64bit update on 32bit is non atomic. Taking care of this
557 * corner case makes code very complicated, like sending IPIs to
558 * cpus, taking care of stats of offline cpus etc.
560 * reset stats is anyway more of a debug feature and this sounds a
561 * corner case. So I am not complicating the code yet until and
562 * unless this becomes a real issue.
564 for_each_possible_cpu(i
) {
565 stats_cpu
= per_cpu_ptr(blkg
->stats_cpu
, i
);
566 stats_cpu
->sectors
= 0;
567 for(j
= 0; j
< BLKIO_STAT_CPU_NR
; j
++)
568 for (k
= 0; k
< BLKIO_STAT_TOTAL
; k
++)
569 stats_cpu
->stat_arr_cpu
[j
][k
] = 0;
574 blkiocg_reset_stats(struct cgroup
*cgroup
, struct cftype
*cftype
, u64 val
)
576 struct blkio_cgroup
*blkcg
;
577 struct blkio_group
*blkg
;
578 struct blkio_group_stats
*stats
;
579 struct hlist_node
*n
;
580 uint64_t queued
[BLKIO_STAT_TOTAL
];
582 #ifdef CONFIG_DEBUG_BLK_CGROUP
583 bool idling
, waiting
, empty
;
584 unsigned long long now
= sched_clock();
587 blkcg
= cgroup_to_blkio_cgroup(cgroup
);
588 spin_lock_irq(&blkcg
->lock
);
589 hlist_for_each_entry(blkg
, n
, &blkcg
->blkg_list
, blkcg_node
) {
590 spin_lock(&blkg
->stats_lock
);
591 stats
= &blkg
->stats
;
592 #ifdef CONFIG_DEBUG_BLK_CGROUP
593 idling
= blkio_blkg_idling(stats
);
594 waiting
= blkio_blkg_waiting(stats
);
595 empty
= blkio_blkg_empty(stats
);
597 for (i
= 0; i
< BLKIO_STAT_TOTAL
; i
++)
598 queued
[i
] = stats
->stat_arr
[BLKIO_STAT_QUEUED
][i
];
599 memset(stats
, 0, sizeof(struct blkio_group_stats
));
600 for (i
= 0; i
< BLKIO_STAT_TOTAL
; i
++)
601 stats
->stat_arr
[BLKIO_STAT_QUEUED
][i
] = queued
[i
];
602 #ifdef CONFIG_DEBUG_BLK_CGROUP
604 blkio_mark_blkg_idling(stats
);
605 stats
->start_idle_time
= now
;
608 blkio_mark_blkg_waiting(stats
);
609 stats
->start_group_wait_time
= now
;
612 blkio_mark_blkg_empty(stats
);
613 stats
->start_empty_time
= now
;
616 spin_unlock(&blkg
->stats_lock
);
618 /* Reset Per cpu stats which don't take blkg->stats_lock */
619 blkio_reset_stats_cpu(blkg
);
622 spin_unlock_irq(&blkcg
->lock
);
626 static void blkio_get_key_name(enum stat_sub_type type
, dev_t dev
, char *str
,
627 int chars_left
, bool diskname_only
)
629 snprintf(str
, chars_left
, "%d:%d", MAJOR(dev
), MINOR(dev
));
630 chars_left
-= strlen(str
);
631 if (chars_left
<= 0) {
633 "Possibly incorrect cgroup stat display format");
639 case BLKIO_STAT_READ
:
640 strlcat(str
, " Read", chars_left
);
642 case BLKIO_STAT_WRITE
:
643 strlcat(str
, " Write", chars_left
);
645 case BLKIO_STAT_SYNC
:
646 strlcat(str
, " Sync", chars_left
);
648 case BLKIO_STAT_ASYNC
:
649 strlcat(str
, " Async", chars_left
);
651 case BLKIO_STAT_TOTAL
:
652 strlcat(str
, " Total", chars_left
);
655 strlcat(str
, " Invalid", chars_left
);
659 static uint64_t blkio_fill_stat(char *str
, int chars_left
, uint64_t val
,
660 struct cgroup_map_cb
*cb
, dev_t dev
)
662 blkio_get_key_name(0, dev
, str
, chars_left
, true);
663 cb
->fill(cb
, str
, val
);
668 static uint64_t blkio_read_stat_cpu(struct blkio_group
*blkg
,
669 enum stat_type_cpu type
, enum stat_sub_type sub_type
)
672 struct blkio_group_stats_cpu
*stats_cpu
;
675 for_each_possible_cpu(cpu
) {
677 stats_cpu
= per_cpu_ptr(blkg
->stats_cpu
, cpu
);
680 start
= u64_stats_fetch_begin(&stats_cpu
->syncp
);
681 if (type
== BLKIO_STAT_CPU_SECTORS
)
682 tval
= stats_cpu
->sectors
;
684 tval
= stats_cpu
->stat_arr_cpu
[type
][sub_type
];
685 } while(u64_stats_fetch_retry(&stats_cpu
->syncp
, start
));
693 static uint64_t blkio_get_stat_cpu(struct blkio_group
*blkg
,
694 struct cgroup_map_cb
*cb
, dev_t dev
, enum stat_type_cpu type
)
696 uint64_t disk_total
, val
;
697 char key_str
[MAX_KEY_LEN
];
698 enum stat_sub_type sub_type
;
700 if (type
== BLKIO_STAT_CPU_SECTORS
) {
701 val
= blkio_read_stat_cpu(blkg
, type
, 0);
702 return blkio_fill_stat(key_str
, MAX_KEY_LEN
- 1, val
, cb
, dev
);
705 for (sub_type
= BLKIO_STAT_READ
; sub_type
< BLKIO_STAT_TOTAL
;
707 blkio_get_key_name(sub_type
, dev
, key_str
, MAX_KEY_LEN
, false);
708 val
= blkio_read_stat_cpu(blkg
, type
, sub_type
);
709 cb
->fill(cb
, key_str
, val
);
712 disk_total
= blkio_read_stat_cpu(blkg
, type
, BLKIO_STAT_READ
) +
713 blkio_read_stat_cpu(blkg
, type
, BLKIO_STAT_WRITE
);
715 blkio_get_key_name(BLKIO_STAT_TOTAL
, dev
, key_str
, MAX_KEY_LEN
, false);
716 cb
->fill(cb
, key_str
, disk_total
);
720 /* This should be called with blkg->stats_lock held */
721 static uint64_t blkio_get_stat(struct blkio_group
*blkg
,
722 struct cgroup_map_cb
*cb
, dev_t dev
, enum stat_type type
)
725 char key_str
[MAX_KEY_LEN
];
726 enum stat_sub_type sub_type
;
728 if (type
== BLKIO_STAT_TIME
)
729 return blkio_fill_stat(key_str
, MAX_KEY_LEN
- 1,
730 blkg
->stats
.time
, cb
, dev
);
731 #ifdef CONFIG_DEBUG_BLK_CGROUP
732 if (type
== BLKIO_STAT_UNACCOUNTED_TIME
)
733 return blkio_fill_stat(key_str
, MAX_KEY_LEN
- 1,
734 blkg
->stats
.unaccounted_time
, cb
, dev
);
735 if (type
== BLKIO_STAT_AVG_QUEUE_SIZE
) {
736 uint64_t sum
= blkg
->stats
.avg_queue_size_sum
;
737 uint64_t samples
= blkg
->stats
.avg_queue_size_samples
;
739 do_div(sum
, samples
);
742 return blkio_fill_stat(key_str
, MAX_KEY_LEN
- 1, sum
, cb
, dev
);
744 if (type
== BLKIO_STAT_GROUP_WAIT_TIME
)
745 return blkio_fill_stat(key_str
, MAX_KEY_LEN
- 1,
746 blkg
->stats
.group_wait_time
, cb
, dev
);
747 if (type
== BLKIO_STAT_IDLE_TIME
)
748 return blkio_fill_stat(key_str
, MAX_KEY_LEN
- 1,
749 blkg
->stats
.idle_time
, cb
, dev
);
750 if (type
== BLKIO_STAT_EMPTY_TIME
)
751 return blkio_fill_stat(key_str
, MAX_KEY_LEN
- 1,
752 blkg
->stats
.empty_time
, cb
, dev
);
753 if (type
== BLKIO_STAT_DEQUEUE
)
754 return blkio_fill_stat(key_str
, MAX_KEY_LEN
- 1,
755 blkg
->stats
.dequeue
, cb
, dev
);
758 for (sub_type
= BLKIO_STAT_READ
; sub_type
< BLKIO_STAT_TOTAL
;
760 blkio_get_key_name(sub_type
, dev
, key_str
, MAX_KEY_LEN
, false);
761 cb
->fill(cb
, key_str
, blkg
->stats
.stat_arr
[type
][sub_type
]);
763 disk_total
= blkg
->stats
.stat_arr
[type
][BLKIO_STAT_READ
] +
764 blkg
->stats
.stat_arr
[type
][BLKIO_STAT_WRITE
];
765 blkio_get_key_name(BLKIO_STAT_TOTAL
, dev
, key_str
, MAX_KEY_LEN
, false);
766 cb
->fill(cb
, key_str
, disk_total
);
770 static int blkio_policy_parse_and_set(char *buf
,
771 struct blkio_policy_node
*newpn
, enum blkio_policy_id plid
, int fileid
)
773 struct gendisk
*disk
= NULL
;
774 char *s
[4], *p
, *major_s
= NULL
, *minor_s
= NULL
;
775 unsigned long major
, minor
;
776 int i
= 0, ret
= -EINVAL
;
781 memset(s
, 0, sizeof(s
));
783 while ((p
= strsep(&buf
, " ")) != NULL
) {
789 /* Prevent from inputing too many things */
797 p
= strsep(&s
[0], ":");
807 if (strict_strtoul(major_s
, 10, &major
))
810 if (strict_strtoul(minor_s
, 10, &minor
))
813 dev
= MKDEV(major
, minor
);
815 if (strict_strtoull(s
[1], 10, &temp
))
818 /* For rule removal, do not check for device presence. */
820 disk
= get_gendisk(dev
, &part
);
830 case BLKIO_POLICY_PROP
:
831 if ((temp
< BLKIO_WEIGHT_MIN
&& temp
> 0) ||
832 temp
> BLKIO_WEIGHT_MAX
)
836 newpn
->fileid
= fileid
;
837 newpn
->val
.weight
= temp
;
839 case BLKIO_POLICY_THROTL
:
841 case BLKIO_THROTL_read_bps_device
:
842 case BLKIO_THROTL_write_bps_device
:
844 newpn
->fileid
= fileid
;
845 newpn
->val
.bps
= temp
;
847 case BLKIO_THROTL_read_iops_device
:
848 case BLKIO_THROTL_write_iops_device
:
849 if (temp
> THROTL_IOPS_MAX
)
853 newpn
->fileid
= fileid
;
854 newpn
->val
.iops
= (unsigned int)temp
;
867 unsigned int blkcg_get_weight(struct blkio_cgroup
*blkcg
,
870 struct blkio_policy_node
*pn
;
874 spin_lock_irqsave(&blkcg
->lock
, flags
);
876 pn
= blkio_policy_search_node(blkcg
, dev
, BLKIO_POLICY_PROP
,
877 BLKIO_PROP_weight_device
);
879 weight
= pn
->val
.weight
;
881 weight
= blkcg
->weight
;
883 spin_unlock_irqrestore(&blkcg
->lock
, flags
);
887 EXPORT_SYMBOL_GPL(blkcg_get_weight
);
889 uint64_t blkcg_get_read_bps(struct blkio_cgroup
*blkcg
, dev_t dev
)
891 struct blkio_policy_node
*pn
;
895 spin_lock_irqsave(&blkcg
->lock
, flags
);
896 pn
= blkio_policy_search_node(blkcg
, dev
, BLKIO_POLICY_THROTL
,
897 BLKIO_THROTL_read_bps_device
);
900 spin_unlock_irqrestore(&blkcg
->lock
, flags
);
905 uint64_t blkcg_get_write_bps(struct blkio_cgroup
*blkcg
, dev_t dev
)
907 struct blkio_policy_node
*pn
;
911 spin_lock_irqsave(&blkcg
->lock
, flags
);
912 pn
= blkio_policy_search_node(blkcg
, dev
, BLKIO_POLICY_THROTL
,
913 BLKIO_THROTL_write_bps_device
);
916 spin_unlock_irqrestore(&blkcg
->lock
, flags
);
921 unsigned int blkcg_get_read_iops(struct blkio_cgroup
*blkcg
, dev_t dev
)
923 struct blkio_policy_node
*pn
;
925 unsigned int iops
= -1;
927 spin_lock_irqsave(&blkcg
->lock
, flags
);
928 pn
= blkio_policy_search_node(blkcg
, dev
, BLKIO_POLICY_THROTL
,
929 BLKIO_THROTL_read_iops_device
);
932 spin_unlock_irqrestore(&blkcg
->lock
, flags
);
937 unsigned int blkcg_get_write_iops(struct blkio_cgroup
*blkcg
, dev_t dev
)
939 struct blkio_policy_node
*pn
;
941 unsigned int iops
= -1;
943 spin_lock_irqsave(&blkcg
->lock
, flags
);
944 pn
= blkio_policy_search_node(blkcg
, dev
, BLKIO_POLICY_THROTL
,
945 BLKIO_THROTL_write_iops_device
);
948 spin_unlock_irqrestore(&blkcg
->lock
, flags
);
953 /* Checks whether user asked for deleting a policy rule */
954 static bool blkio_delete_rule_command(struct blkio_policy_node
*pn
)
957 case BLKIO_POLICY_PROP
:
958 if (pn
->val
.weight
== 0)
961 case BLKIO_POLICY_THROTL
:
963 case BLKIO_THROTL_read_bps_device
:
964 case BLKIO_THROTL_write_bps_device
:
965 if (pn
->val
.bps
== 0)
968 case BLKIO_THROTL_read_iops_device
:
969 case BLKIO_THROTL_write_iops_device
:
970 if (pn
->val
.iops
== 0)
981 static void blkio_update_policy_rule(struct blkio_policy_node
*oldpn
,
982 struct blkio_policy_node
*newpn
)
984 switch(oldpn
->plid
) {
985 case BLKIO_POLICY_PROP
:
986 oldpn
->val
.weight
= newpn
->val
.weight
;
988 case BLKIO_POLICY_THROTL
:
989 switch(newpn
->fileid
) {
990 case BLKIO_THROTL_read_bps_device
:
991 case BLKIO_THROTL_write_bps_device
:
992 oldpn
->val
.bps
= newpn
->val
.bps
;
994 case BLKIO_THROTL_read_iops_device
:
995 case BLKIO_THROTL_write_iops_device
:
996 oldpn
->val
.iops
= newpn
->val
.iops
;
1005 * Some rules/values in blkg have changed. Propagate those to respective
1008 static void blkio_update_blkg_policy(struct blkio_cgroup
*blkcg
,
1009 struct blkio_group
*blkg
, struct blkio_policy_node
*pn
)
1011 unsigned int weight
, iops
;
1015 case BLKIO_POLICY_PROP
:
1016 weight
= pn
->val
.weight
? pn
->val
.weight
:
1018 blkio_update_group_weight(blkg
, weight
);
1020 case BLKIO_POLICY_THROTL
:
1021 switch(pn
->fileid
) {
1022 case BLKIO_THROTL_read_bps_device
:
1023 case BLKIO_THROTL_write_bps_device
:
1024 bps
= pn
->val
.bps
? pn
->val
.bps
: (-1);
1025 blkio_update_group_bps(blkg
, bps
, pn
->fileid
);
1027 case BLKIO_THROTL_read_iops_device
:
1028 case BLKIO_THROTL_write_iops_device
:
1029 iops
= pn
->val
.iops
? pn
->val
.iops
: (-1);
1030 blkio_update_group_iops(blkg
, iops
, pn
->fileid
);
1040 * A policy node rule has been updated. Propagate this update to all the
1041 * block groups which might be affected by this update.
1043 static void blkio_update_policy_node_blkg(struct blkio_cgroup
*blkcg
,
1044 struct blkio_policy_node
*pn
)
1046 struct blkio_group
*blkg
;
1047 struct hlist_node
*n
;
1049 spin_lock(&blkio_list_lock
);
1050 spin_lock_irq(&blkcg
->lock
);
1052 hlist_for_each_entry(blkg
, n
, &blkcg
->blkg_list
, blkcg_node
) {
1053 if (pn
->dev
!= blkg
->dev
|| pn
->plid
!= blkg
->plid
)
1055 blkio_update_blkg_policy(blkcg
, blkg
, pn
);
1058 spin_unlock_irq(&blkcg
->lock
);
1059 spin_unlock(&blkio_list_lock
);
1062 static int blkiocg_file_write(struct cgroup
*cgrp
, struct cftype
*cft
,
1067 struct blkio_policy_node
*newpn
, *pn
;
1068 struct blkio_cgroup
*blkcg
;
1070 enum blkio_policy_id plid
= BLKIOFILE_POLICY(cft
->private);
1071 int fileid
= BLKIOFILE_ATTR(cft
->private);
1073 buf
= kstrdup(buffer
, GFP_KERNEL
);
1077 newpn
= kzalloc(sizeof(*newpn
), GFP_KERNEL
);
1083 ret
= blkio_policy_parse_and_set(buf
, newpn
, plid
, fileid
);
1087 blkcg
= cgroup_to_blkio_cgroup(cgrp
);
1089 spin_lock_irq(&blkcg
->lock
);
1091 pn
= blkio_policy_search_node(blkcg
, newpn
->dev
, plid
, fileid
);
1093 if (!blkio_delete_rule_command(newpn
)) {
1094 blkio_policy_insert_node(blkcg
, newpn
);
1097 spin_unlock_irq(&blkcg
->lock
);
1098 goto update_io_group
;
1101 if (blkio_delete_rule_command(newpn
)) {
1102 blkio_policy_delete_node(pn
);
1104 spin_unlock_irq(&blkcg
->lock
);
1105 goto update_io_group
;
1107 spin_unlock_irq(&blkcg
->lock
);
1109 blkio_update_policy_rule(pn
, newpn
);
1112 blkio_update_policy_node_blkg(blkcg
, newpn
);
1123 blkio_print_policy_node(struct seq_file
*m
, struct blkio_policy_node
*pn
)
1126 case BLKIO_POLICY_PROP
:
1127 if (pn
->fileid
== BLKIO_PROP_weight_device
)
1128 seq_printf(m
, "%u:%u\t%u\n", MAJOR(pn
->dev
),
1129 MINOR(pn
->dev
), pn
->val
.weight
);
1131 case BLKIO_POLICY_THROTL
:
1132 switch(pn
->fileid
) {
1133 case BLKIO_THROTL_read_bps_device
:
1134 case BLKIO_THROTL_write_bps_device
:
1135 seq_printf(m
, "%u:%u\t%llu\n", MAJOR(pn
->dev
),
1136 MINOR(pn
->dev
), pn
->val
.bps
);
1138 case BLKIO_THROTL_read_iops_device
:
1139 case BLKIO_THROTL_write_iops_device
:
1140 seq_printf(m
, "%u:%u\t%u\n", MAJOR(pn
->dev
),
1141 MINOR(pn
->dev
), pn
->val
.iops
);
1150 /* cgroup files which read their data from policy nodes end up here */
1151 static void blkio_read_policy_node_files(struct cftype
*cft
,
1152 struct blkio_cgroup
*blkcg
, struct seq_file
*m
)
1154 struct blkio_policy_node
*pn
;
1156 if (!list_empty(&blkcg
->policy_list
)) {
1157 spin_lock_irq(&blkcg
->lock
);
1158 list_for_each_entry(pn
, &blkcg
->policy_list
, node
) {
1159 if (!pn_matches_cftype(cft
, pn
))
1161 blkio_print_policy_node(m
, pn
);
1163 spin_unlock_irq(&blkcg
->lock
);
1167 static int blkiocg_file_read(struct cgroup
*cgrp
, struct cftype
*cft
,
1170 struct blkio_cgroup
*blkcg
;
1171 enum blkio_policy_id plid
= BLKIOFILE_POLICY(cft
->private);
1172 int name
= BLKIOFILE_ATTR(cft
->private);
1174 blkcg
= cgroup_to_blkio_cgroup(cgrp
);
1177 case BLKIO_POLICY_PROP
:
1179 case BLKIO_PROP_weight_device
:
1180 blkio_read_policy_node_files(cft
, blkcg
, m
);
1186 case BLKIO_POLICY_THROTL
:
1188 case BLKIO_THROTL_read_bps_device
:
1189 case BLKIO_THROTL_write_bps_device
:
1190 case BLKIO_THROTL_read_iops_device
:
1191 case BLKIO_THROTL_write_iops_device
:
1192 blkio_read_policy_node_files(cft
, blkcg
, m
);
1205 static int blkio_read_blkg_stats(struct blkio_cgroup
*blkcg
,
1206 struct cftype
*cft
, struct cgroup_map_cb
*cb
,
1207 enum stat_type type
, bool show_total
, bool pcpu
)
1209 struct blkio_group
*blkg
;
1210 struct hlist_node
*n
;
1211 uint64_t cgroup_total
= 0;
1214 hlist_for_each_entry_rcu(blkg
, n
, &blkcg
->blkg_list
, blkcg_node
) {
1216 if (!cftype_blkg_same_policy(cft
, blkg
))
1219 cgroup_total
+= blkio_get_stat_cpu(blkg
, cb
,
1222 spin_lock_irq(&blkg
->stats_lock
);
1223 cgroup_total
+= blkio_get_stat(blkg
, cb
,
1225 spin_unlock_irq(&blkg
->stats_lock
);
1230 cb
->fill(cb
, "Total", cgroup_total
);
1235 /* All map kind of cgroup file get serviced by this function */
1236 static int blkiocg_file_read_map(struct cgroup
*cgrp
, struct cftype
*cft
,
1237 struct cgroup_map_cb
*cb
)
1239 struct blkio_cgroup
*blkcg
;
1240 enum blkio_policy_id plid
= BLKIOFILE_POLICY(cft
->private);
1241 int name
= BLKIOFILE_ATTR(cft
->private);
1243 blkcg
= cgroup_to_blkio_cgroup(cgrp
);
1246 case BLKIO_POLICY_PROP
:
1248 case BLKIO_PROP_time
:
1249 return blkio_read_blkg_stats(blkcg
, cft
, cb
,
1250 BLKIO_STAT_TIME
, 0, 0);
1251 case BLKIO_PROP_sectors
:
1252 return blkio_read_blkg_stats(blkcg
, cft
, cb
,
1253 BLKIO_STAT_CPU_SECTORS
, 0, 1);
1254 case BLKIO_PROP_io_service_bytes
:
1255 return blkio_read_blkg_stats(blkcg
, cft
, cb
,
1256 BLKIO_STAT_CPU_SERVICE_BYTES
, 1, 1);
1257 case BLKIO_PROP_io_serviced
:
1258 return blkio_read_blkg_stats(blkcg
, cft
, cb
,
1259 BLKIO_STAT_CPU_SERVICED
, 1, 1);
1260 case BLKIO_PROP_io_service_time
:
1261 return blkio_read_blkg_stats(blkcg
, cft
, cb
,
1262 BLKIO_STAT_SERVICE_TIME
, 1, 0);
1263 case BLKIO_PROP_io_wait_time
:
1264 return blkio_read_blkg_stats(blkcg
, cft
, cb
,
1265 BLKIO_STAT_WAIT_TIME
, 1, 0);
1266 case BLKIO_PROP_io_merged
:
1267 return blkio_read_blkg_stats(blkcg
, cft
, cb
,
1268 BLKIO_STAT_CPU_MERGED
, 1, 1);
1269 case BLKIO_PROP_io_queued
:
1270 return blkio_read_blkg_stats(blkcg
, cft
, cb
,
1271 BLKIO_STAT_QUEUED
, 1, 0);
1272 #ifdef CONFIG_DEBUG_BLK_CGROUP
1273 case BLKIO_PROP_unaccounted_time
:
1274 return blkio_read_blkg_stats(blkcg
, cft
, cb
,
1275 BLKIO_STAT_UNACCOUNTED_TIME
, 0, 0);
1276 case BLKIO_PROP_dequeue
:
1277 return blkio_read_blkg_stats(blkcg
, cft
, cb
,
1278 BLKIO_STAT_DEQUEUE
, 0, 0);
1279 case BLKIO_PROP_avg_queue_size
:
1280 return blkio_read_blkg_stats(blkcg
, cft
, cb
,
1281 BLKIO_STAT_AVG_QUEUE_SIZE
, 0, 0);
1282 case BLKIO_PROP_group_wait_time
:
1283 return blkio_read_blkg_stats(blkcg
, cft
, cb
,
1284 BLKIO_STAT_GROUP_WAIT_TIME
, 0, 0);
1285 case BLKIO_PROP_idle_time
:
1286 return blkio_read_blkg_stats(blkcg
, cft
, cb
,
1287 BLKIO_STAT_IDLE_TIME
, 0, 0);
1288 case BLKIO_PROP_empty_time
:
1289 return blkio_read_blkg_stats(blkcg
, cft
, cb
,
1290 BLKIO_STAT_EMPTY_TIME
, 0, 0);
1296 case BLKIO_POLICY_THROTL
:
1298 case BLKIO_THROTL_io_service_bytes
:
1299 return blkio_read_blkg_stats(blkcg
, cft
, cb
,
1300 BLKIO_STAT_CPU_SERVICE_BYTES
, 1, 1);
1301 case BLKIO_THROTL_io_serviced
:
1302 return blkio_read_blkg_stats(blkcg
, cft
, cb
,
1303 BLKIO_STAT_CPU_SERVICED
, 1, 1);
1315 static int blkio_weight_write(struct blkio_cgroup
*blkcg
, u64 val
)
1317 struct blkio_group
*blkg
;
1318 struct hlist_node
*n
;
1319 struct blkio_policy_node
*pn
;
1321 if (val
< BLKIO_WEIGHT_MIN
|| val
> BLKIO_WEIGHT_MAX
)
1324 spin_lock(&blkio_list_lock
);
1325 spin_lock_irq(&blkcg
->lock
);
1326 blkcg
->weight
= (unsigned int)val
;
1328 hlist_for_each_entry(blkg
, n
, &blkcg
->blkg_list
, blkcg_node
) {
1329 pn
= blkio_policy_search_node(blkcg
, blkg
->dev
,
1330 BLKIO_POLICY_PROP
, BLKIO_PROP_weight_device
);
1334 blkio_update_group_weight(blkg
, blkcg
->weight
);
1336 spin_unlock_irq(&blkcg
->lock
);
1337 spin_unlock(&blkio_list_lock
);
1341 static u64
blkiocg_file_read_u64 (struct cgroup
*cgrp
, struct cftype
*cft
) {
1342 struct blkio_cgroup
*blkcg
;
1343 enum blkio_policy_id plid
= BLKIOFILE_POLICY(cft
->private);
1344 int name
= BLKIOFILE_ATTR(cft
->private);
1346 blkcg
= cgroup_to_blkio_cgroup(cgrp
);
1349 case BLKIO_POLICY_PROP
:
1351 case BLKIO_PROP_weight
:
1352 return (u64
)blkcg
->weight
;
1362 blkiocg_file_write_u64(struct cgroup
*cgrp
, struct cftype
*cft
, u64 val
)
1364 struct blkio_cgroup
*blkcg
;
1365 enum blkio_policy_id plid
= BLKIOFILE_POLICY(cft
->private);
1366 int name
= BLKIOFILE_ATTR(cft
->private);
1368 blkcg
= cgroup_to_blkio_cgroup(cgrp
);
1371 case BLKIO_POLICY_PROP
:
1373 case BLKIO_PROP_weight
:
1374 return blkio_weight_write(blkcg
, val
);
1384 struct cftype blkio_files
[] = {
1386 .name
= "weight_device",
1387 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP
,
1388 BLKIO_PROP_weight_device
),
1389 .read_seq_string
= blkiocg_file_read
,
1390 .write_string
= blkiocg_file_write
,
1391 .max_write_len
= 256,
1395 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP
,
1397 .read_u64
= blkiocg_file_read_u64
,
1398 .write_u64
= blkiocg_file_write_u64
,
1402 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP
,
1404 .read_map
= blkiocg_file_read_map
,
1408 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP
,
1409 BLKIO_PROP_sectors
),
1410 .read_map
= blkiocg_file_read_map
,
1413 .name
= "io_service_bytes",
1414 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP
,
1415 BLKIO_PROP_io_service_bytes
),
1416 .read_map
= blkiocg_file_read_map
,
1419 .name
= "io_serviced",
1420 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP
,
1421 BLKIO_PROP_io_serviced
),
1422 .read_map
= blkiocg_file_read_map
,
1425 .name
= "io_service_time",
1426 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP
,
1427 BLKIO_PROP_io_service_time
),
1428 .read_map
= blkiocg_file_read_map
,
1431 .name
= "io_wait_time",
1432 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP
,
1433 BLKIO_PROP_io_wait_time
),
1434 .read_map
= blkiocg_file_read_map
,
1437 .name
= "io_merged",
1438 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP
,
1439 BLKIO_PROP_io_merged
),
1440 .read_map
= blkiocg_file_read_map
,
1443 .name
= "io_queued",
1444 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP
,
1445 BLKIO_PROP_io_queued
),
1446 .read_map
= blkiocg_file_read_map
,
1449 .name
= "reset_stats",
1450 .write_u64
= blkiocg_reset_stats
,
1452 #ifdef CONFIG_BLK_DEV_THROTTLING
1454 .name
= "throttle.read_bps_device",
1455 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL
,
1456 BLKIO_THROTL_read_bps_device
),
1457 .read_seq_string
= blkiocg_file_read
,
1458 .write_string
= blkiocg_file_write
,
1459 .max_write_len
= 256,
1463 .name
= "throttle.write_bps_device",
1464 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL
,
1465 BLKIO_THROTL_write_bps_device
),
1466 .read_seq_string
= blkiocg_file_read
,
1467 .write_string
= blkiocg_file_write
,
1468 .max_write_len
= 256,
1472 .name
= "throttle.read_iops_device",
1473 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL
,
1474 BLKIO_THROTL_read_iops_device
),
1475 .read_seq_string
= blkiocg_file_read
,
1476 .write_string
= blkiocg_file_write
,
1477 .max_write_len
= 256,
1481 .name
= "throttle.write_iops_device",
1482 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL
,
1483 BLKIO_THROTL_write_iops_device
),
1484 .read_seq_string
= blkiocg_file_read
,
1485 .write_string
= blkiocg_file_write
,
1486 .max_write_len
= 256,
1489 .name
= "throttle.io_service_bytes",
1490 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL
,
1491 BLKIO_THROTL_io_service_bytes
),
1492 .read_map
= blkiocg_file_read_map
,
1495 .name
= "throttle.io_serviced",
1496 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL
,
1497 BLKIO_THROTL_io_serviced
),
1498 .read_map
= blkiocg_file_read_map
,
1500 #endif /* CONFIG_BLK_DEV_THROTTLING */
1502 #ifdef CONFIG_DEBUG_BLK_CGROUP
1504 .name
= "avg_queue_size",
1505 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP
,
1506 BLKIO_PROP_avg_queue_size
),
1507 .read_map
= blkiocg_file_read_map
,
1510 .name
= "group_wait_time",
1511 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP
,
1512 BLKIO_PROP_group_wait_time
),
1513 .read_map
= blkiocg_file_read_map
,
1516 .name
= "idle_time",
1517 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP
,
1518 BLKIO_PROP_idle_time
),
1519 .read_map
= blkiocg_file_read_map
,
1522 .name
= "empty_time",
1523 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP
,
1524 BLKIO_PROP_empty_time
),
1525 .read_map
= blkiocg_file_read_map
,
1529 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP
,
1530 BLKIO_PROP_dequeue
),
1531 .read_map
= blkiocg_file_read_map
,
1534 .name
= "unaccounted_time",
1535 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP
,
1536 BLKIO_PROP_unaccounted_time
),
1537 .read_map
= blkiocg_file_read_map
,
1542 static int blkiocg_populate(struct cgroup_subsys
*subsys
, struct cgroup
*cgroup
)
1544 return cgroup_add_files(cgroup
, subsys
, blkio_files
,
1545 ARRAY_SIZE(blkio_files
));
1548 static void blkiocg_destroy(struct cgroup
*cgroup
)
1550 struct blkio_cgroup
*blkcg
= cgroup_to_blkio_cgroup(cgroup
);
1551 unsigned long flags
;
1552 struct blkio_group
*blkg
;
1554 struct blkio_policy_type
*blkiop
;
1555 struct blkio_policy_node
*pn
, *pntmp
;
1559 spin_lock_irqsave(&blkcg
->lock
, flags
);
1561 if (hlist_empty(&blkcg
->blkg_list
)) {
1562 spin_unlock_irqrestore(&blkcg
->lock
, flags
);
1566 blkg
= hlist_entry(blkcg
->blkg_list
.first
, struct blkio_group
,
1568 key
= rcu_dereference(blkg
->key
);
1569 __blkiocg_del_blkio_group(blkg
);
1571 spin_unlock_irqrestore(&blkcg
->lock
, flags
);
1574 * This blkio_group is being unlinked as associated cgroup is
1575 * going away. Let all the IO controlling policies know about
1578 spin_lock(&blkio_list_lock
);
1579 list_for_each_entry(blkiop
, &blkio_list
, list
) {
1580 if (blkiop
->plid
!= blkg
->plid
)
1582 blkiop
->ops
.blkio_unlink_group_fn(key
, blkg
);
1584 spin_unlock(&blkio_list_lock
);
1587 list_for_each_entry_safe(pn
, pntmp
, &blkcg
->policy_list
, node
) {
1588 blkio_policy_delete_node(pn
);
1592 free_css_id(&blkio_subsys
, &blkcg
->css
);
1594 if (blkcg
!= &blkio_root_cgroup
)
1598 static struct cgroup_subsys_state
*blkiocg_create(struct cgroup
*cgroup
)
1600 struct blkio_cgroup
*blkcg
;
1601 struct cgroup
*parent
= cgroup
->parent
;
1604 blkcg
= &blkio_root_cgroup
;
1608 blkcg
= kzalloc(sizeof(*blkcg
), GFP_KERNEL
);
1610 return ERR_PTR(-ENOMEM
);
1612 blkcg
->weight
= BLKIO_WEIGHT_DEFAULT
;
1614 spin_lock_init(&blkcg
->lock
);
1615 INIT_HLIST_HEAD(&blkcg
->blkg_list
);
1617 INIT_LIST_HEAD(&blkcg
->policy_list
);
1622 * We cannot support shared io contexts, as we have no mean to support
1623 * two tasks with the same ioc in two different groups without major rework
1624 * of the main cic data structures. For now we allow a task to change
1625 * its cgroup only if it's the only owner of its ioc.
1627 static int blkiocg_can_attach(struct cgroup
*cgrp
, struct cgroup_taskset
*tset
)
1629 struct task_struct
*task
;
1630 struct io_context
*ioc
;
1633 /* task_lock() is needed to avoid races with exit_io_context() */
1634 cgroup_taskset_for_each(task
, cgrp
, tset
) {
1636 ioc
= task
->io_context
;
1637 if (ioc
&& atomic_read(&ioc
->nr_tasks
) > 1)
1646 static void blkiocg_attach(struct cgroup
*cgrp
, struct cgroup_taskset
*tset
)
1648 struct task_struct
*task
;
1649 struct io_context
*ioc
;
1651 cgroup_taskset_for_each(task
, cgrp
, tset
) {
1652 /* we don't lose anything even if ioc allocation fails */
1653 ioc
= get_task_io_context(task
, GFP_ATOMIC
, NUMA_NO_NODE
);
1655 ioc_cgroup_changed(ioc
);
1656 put_io_context(ioc
);
1661 void blkio_policy_register(struct blkio_policy_type
*blkiop
)
1663 spin_lock(&blkio_list_lock
);
1664 list_add_tail(&blkiop
->list
, &blkio_list
);
1665 spin_unlock(&blkio_list_lock
);
1667 EXPORT_SYMBOL_GPL(blkio_policy_register
);
1669 void blkio_policy_unregister(struct blkio_policy_type
*blkiop
)
1671 spin_lock(&blkio_list_lock
);
1672 list_del_init(&blkiop
->list
);
1673 spin_unlock(&blkio_list_lock
);
1675 EXPORT_SYMBOL_GPL(blkio_policy_unregister
);
1677 static int __init
init_cgroup_blkio(void)
1679 return cgroup_load_subsys(&blkio_subsys
);
1682 static void __exit
exit_cgroup_blkio(void)
1684 cgroup_unload_subsys(&blkio_subsys
);
1687 module_init(init_cgroup_blkio
);
1688 module_exit(exit_cgroup_blkio
);
1689 MODULE_LICENSE("GPL");