add patch use-percpu_counter-for-extent_cache-hits-misses
[ext4-patch-queue.git] / use-percpu_counter-for-extent_cache-hits-misses
blob87d6100231edc29c44cfbccb6201ae3012458b58
1 ext4: use percpu_counters for extent_status cache hits/misses
3 From: Yang Guo <guoyang2@huawei.com>
5 @es_stats_cache_hits and @es_stats_cache_misses are accessed frequently in
6 ext4_es_lookup_extent function, it would influence the ext4 read/write
7 performance in NUMA system.
8 Let's optimize it using percpu_counter, it is profitable for the
9 performance.
11 The test command is as below:
12 fio -name=randwrite -numjobs=8 -filename=/mnt/test1 -rw=randwrite
13 -ioengine=libaio -direct=1 -iodepth=64 -sync=0 -norandommap -group_reporting
14 -runtime=120 -time_based -bs=4k -size=5G
16 And the result is better 10% than the initial implement:
17 without the patch,IOPS=197k, BW=770MiB/s (808MB/s)(90.3GiB/120002msec)
18 with the patch,  IOPS=218k, BW=852MiB/s (894MB/s)(99.9GiB/120002msec)
20 Cc: Andreas Dilger <adilger.kernel@dilger.ca>
21 Signed-off-by: Yang Guo <guoyang2@huawei.com>
22 Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
23 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
24 ---
25  fs/ext4/extents_status.c | 20 +++++++++++++-------
26  fs/ext4/extents_status.h |  4 ++--
27  2 files changed, 15 insertions(+), 9 deletions(-)
29 diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
30 index e977c560992c..dc28a9642452 100644
31 --- a/fs/ext4/extents_status.c
32 +++ b/fs/ext4/extents_status.c
33 @@ -948,7 +948,7 @@ int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
34                 es->es_pblk = es1->es_pblk;
35                 if (!ext4_es_is_referenced(es1))
36                         ext4_es_set_referenced(es1);
37 -               stats->es_stats_cache_hits++;
38 +               percpu_counter_inc(&stats->es_stats_cache_hits);
39                 if (next_lblk) {
40                         node = rb_next(&es1->rb_node);
41                         if (node) {
42 @@ -959,7 +959,7 @@ int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
43                                 *next_lblk = 0;
44                 }
45         } else {
46 -               stats->es_stats_cache_misses++;
47 +               percpu_counter_inc(&stats->es_stats_cache_misses);
48         }
50         read_unlock(&EXT4_I(inode)->i_es_lock);
51 @@ -1586,9 +1586,9 @@ int ext4_seq_es_shrinker_info_show(struct seq_file *seq, void *v)
52         seq_printf(seq, "stats:\n  %lld objects\n  %lld reclaimable objects\n",
53                    percpu_counter_sum_positive(&es_stats->es_stats_all_cnt),
54                    percpu_counter_sum_positive(&es_stats->es_stats_shk_cnt));
55 -       seq_printf(seq, "  %lu/%lu cache hits/misses\n",
56 -                  es_stats->es_stats_cache_hits,
57 -                  es_stats->es_stats_cache_misses);
58 +       seq_printf(seq, "  %llu/%llu cache hits/misses\n",
59 +                  percpu_counter_sum_positive(&es_stats->es_stats_cache_hits),
60 +                  percpu_counter_sum_positive(&es_stats->es_stats_cache_misses));
61         if (inode_cnt)
62                 seq_printf(seq, "  %d inodes on list\n", inode_cnt);
64 @@ -1615,8 +1615,14 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi)
65         sbi->s_es_nr_inode = 0;
66         spin_lock_init(&sbi->s_es_lock);
67         sbi->s_es_stats.es_stats_shrunk = 0;
68 -       sbi->s_es_stats.es_stats_cache_hits = 0;
69 -       sbi->s_es_stats.es_stats_cache_misses = 0;
70 +       err = percpu_counter_init(&sbi->s_es_stats.es_stats_cache_hits, 0,
71 +                                 GFP_KERNEL);
72 +       if (err)
73 +               return err;
74 +       err = percpu_counter_init(&sbi->s_es_stats.es_stats_cache_misses, 0,
75 +                                 GFP_KERNEL);
76 +       if (err)
77 +               return err;
78         sbi->s_es_stats.es_stats_scan_time = 0;
79         sbi->s_es_stats.es_stats_max_scan_time = 0;
80         err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL);
81 diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
82 index 5e5c4a40d863..825313c59752 100644
83 --- a/fs/ext4/extents_status.h
84 +++ b/fs/ext4/extents_status.h
85 @@ -70,8 +70,8 @@ struct ext4_es_tree {
87  struct ext4_es_stats {
88         unsigned long es_stats_shrunk;
89 -       unsigned long es_stats_cache_hits;
90 -       unsigned long es_stats_cache_misses;
91 +       struct percpu_counter es_stats_cache_hits;
92 +       struct percpu_counter es_stats_cache_misses;
93         u64 es_stats_scan_time;
94         u64 es_stats_max_scan_time;
95         struct percpu_counter es_stats_all_cnt;