Remove #include of linux/jbd.h from mballoc-core so it will compile
[ext4-patch-queue.git] / jbd-stats-through-procfs
blob8136168dcaeebe962edf59abed68799e208c5eb3
1 jbd2: jbd2 stats through procfs
3 From: Johann Lombardi <johann.lombardi@bull.net>
5 The patch below updates the jbd stats patch to 2.6.20/jbd2.
6 The initial patch was posted by Alex Tomas in December 2005
7 (http://marc.info/?l=linux-ext4&m=113538565128617&w=2).
8 It provides statistics via procfs such as transaction lifetime and size.
10 ometimes, investigating performance problems, i find useful to have
11 stats from jbd about transaction's lifetime, size, etc. here is a
12 patch for review and inclusion probably.
14 for example, stats after creation of 3M files in htree directory:
16 [root@bob ~]# cat /proc/fs/jbd/sda/history
17 R/C  tid   wait  run   lock  flush log   hndls  block inlog ctime write drop  close
18 R    261   8260  2720  0     0     750   9892   8170  8187
19 C    259                                                    750   0     4885  1
20 R    262   20    2200  10    0     770   9836   8170  8187
21 R    263   30    2200  10    0     3070  9812   8170  8187
22 R    264   0     5000  10    0     1340  0      0     0
23 C    261                                                    8240  3212  4957  0
24 R    265   8260  1470  0     0     4640  9854   8170  8187
25 R    266   0     5000  10    0     1460  0      0     0
26 C    262                                                    8210  2989  4868  0
27 R    267   8230  1490  10    0     4440  9875   8171  8188
28 R    268   0     5000  10    0     1260  0      0     0
29 C    263                                                    7710  2937  4908  0
30 R    269   7730  1470  10    0     3330  9841   8170  8187
31 R    270   0     5000  10    0     830   0      0     0
32 C    265                                                    8140  3234  4898  0
33 C    267                                                    720   0     4849  1
34 R    271   8630  2740  20    0     740   9819   8170  8187
35 C    269                                                    800   0     4214  1
36 R    272   40    2170  10    0     830   9716   8170  8187
37 R    273   40    2280  0     0     3530  9799   8170  8187
38 R    274   0     5000  10    0     990   0      0     0
41 where,
43 R     - line for transaction's life from T_RUNNING to T_FINISHED
44 C     - line for transaction's checkpointing
45 tid   - transaction's id
46 wait  - for how long we were waiting for new transaction to start
47          (the longest period journal_start() took in this transaction)
48 run   - real transaction's lifetime (from T_RUNNING to T_LOCKED
49 lock  - how long we were waiting for all handles to close
50          (time the transaction was in T_LOCKED)
51 flush - how long it took to flush all data (data=ordered)
52 log   - how long it took to write the transaction to the log
53 hndls - how many handles got to the transaction
54 block - how many blocks got to the transaction
55 inlog - how many blocks are written to the log (block + descriptors)
56 ctime - how long it took to checkpoint the transaction
57 write - how many blocks have been written during checkpointing
58 drop  - how many blocks have been dropped during checkpointing
59 close - how many running transactions have been closed to checkpoint this one
61 all times are in msec.
64 [root@bob ~]# cat /proc/fs/jbd/sda/info
65 280 transaction, each upto 8192 blocks
66 average:
67   1633ms waiting for transaction
68   3616ms running transaction
69   5ms transaction was being locked
70   1ms flushing data (in ordered mode)
71   1799ms logging transaction
72   11781 handles per transaction
73   5629 blocks per transaction
74   5641 logged blocks per transaction
78 Signed-off-by: Johann Lombardi <johann.lombardi@bull.net>
80 ---
82  fs/jbd2/checkpoint.c  |   12 +
83  fs/jbd2/commit.c      |   47 +++++++
84  fs/jbd2/journal.c     |  332 ++++++++++++++++++++++++++++++++++++++++++++++++++
85  fs/jbd2/transaction.c |    9 +
86  include/linux/jbd2.h  |   92 +++++++++++++
87  5 files changed, 490 insertions(+), 2 deletions(-)
90 Index: linux-2.6.23-rc6/fs/jbd2/checkpoint.c
91 ===================================================================
92 --- linux-2.6.23-rc6.orig/fs/jbd2/checkpoint.c  2007-09-14 10:01:50.000000000 -0700
93 +++ linux-2.6.23-rc6/fs/jbd2/checkpoint.c       2007-09-14 10:06:10.000000000 -0700
94 @@ -232,7 +232,8 @@ __flush_batch(journal_t *journal, struct
95   * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
96   */
97  static int __process_buffer(journal_t *journal, struct journal_head *jh,
98 -                       struct buffer_head **bhs, int *batch_count)
99 +                       struct buffer_head **bhs, int *batch_count,
100 +                       transaction_t *transaction)
102         struct buffer_head *bh = jh2bh(jh);
103         int ret = 0;
104 @@ -250,6 +251,7 @@ static int __process_buffer(journal_t *j
105                 transaction_t *t = jh->b_transaction;
106                 tid_t tid = t->t_tid;
108 +               transaction->t_chp_stats.cs_forced_to_close++;
109                 spin_unlock(&journal->j_list_lock);
110                 jbd_unlock_bh_state(bh);
111                 jbd2_log_start_commit(journal, tid);
112 @@ -279,6 +281,7 @@ static int __process_buffer(journal_t *j
113                 bhs[*batch_count] = bh;
114                 __buffer_relink_io(jh);
115                 jbd_unlock_bh_state(bh);
116 +               transaction->t_chp_stats.cs_written++;
117                 (*batch_count)++;
118                 if (*batch_count == NR_BATCH) {
119                         spin_unlock(&journal->j_list_lock);
120 @@ -322,6 +325,8 @@ int jbd2_log_do_checkpoint(journal_t *jo
121         if (!journal->j_checkpoint_transactions)
122                 goto out;
123         transaction = journal->j_checkpoint_transactions;
124 +       if (transaction->t_chp_stats.cs_chp_time == 0)
125 +               transaction->t_chp_stats.cs_chp_time = CURRENT_MSECS;
126         this_tid = transaction->t_tid;
127  restart:
128         /*
129 @@ -346,7 +351,8 @@ restart:
130                                 retry = 1;
131                                 break;
132                         }
133 -                       retry = __process_buffer(journal, jh, bhs,&batch_count);
134 +                       retry = __process_buffer(journal, jh, bhs, &batch_count,
135 +                                                transaction);
136                         if (!retry && lock_need_resched(&journal->j_list_lock)){
137                                 spin_unlock(&journal->j_list_lock);
138                                 retry = 1;
139 @@ -668,6 +674,8 @@ void __jbd2_journal_insert_checkpoint(st
141  void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transaction)
143 +       struct transaction_stats_s stats;
145         assert_spin_locked(&journal->j_list_lock);
146         if (transaction->t_cpnext) {
147                 transaction->t_cpnext->t_cpprev = transaction->t_cpprev;
148 Index: linux-2.6.23-rc6/fs/jbd2/commit.c
149 ===================================================================
150 --- linux-2.6.23-rc6.orig/fs/jbd2/commit.c      2007-09-14 10:03:23.000000000 -0700
151 +++ linux-2.6.23-rc6/fs/jbd2/commit.c   2007-09-14 10:06:10.000000000 -0700
152 @@ -21,6 +21,7 @@
153  #include <linux/mm.h>
154  #include <linux/pagemap.h>
155  #include <linux/crc32.h>
156 +#include <linux/jiffies.h>
158  /*
159   * Default IO end handler for temporary BJ_IO buffer_heads.
160 @@ -365,6 +366,7 @@ static inline void write_tag_block(int t
161   */
162  void jbd2_journal_commit_transaction(journal_t *journal)
164 +       struct transaction_stats_s stats;
165         transaction_t *commit_transaction;
166         struct journal_head *jh, *new_jh, *descriptor;
167         struct buffer_head **wbuf = journal->j_wbuf;
168 @@ -414,6 +416,11 @@ void jbd2_journal_commit_transaction(jou
169         spin_lock(&journal->j_state_lock);
170         commit_transaction->t_state = T_LOCKED;
172 +       stats.ts_wait = commit_transaction->t_max_wait;
173 +       stats.ts_locked = CURRENT_MSECS;
174 +       stats.ts_running = jbd2_time_diff(commit_transaction->t_start,
175 +                                               stats.ts_locked);
177         spin_lock(&commit_transaction->t_handle_lock);
178         while (commit_transaction->t_updates) {
179                 DEFINE_WAIT(wait);
180 @@ -484,6 +491,9 @@ void jbd2_journal_commit_transaction(jou
181          */
182         jbd2_journal_switch_revoke_table(journal);
184 +       stats.ts_flushing = CURRENT_MSECS;
185 +       stats.ts_locked = jbd2_time_diff(stats.ts_locked, stats.ts_flushing);
187         commit_transaction->t_state = T_FLUSH;
188         journal->j_committing_transaction = commit_transaction;
189         journal->j_running_transaction = NULL;
190 @@ -552,6 +562,11 @@ void jbd2_journal_commit_transaction(jou
191          */
192         commit_transaction->t_state = T_COMMIT;
194 +       stats.ts_logging = CURRENT_MSECS;
195 +       stats.ts_flushing = jbd2_time_diff(stats.ts_flushing, stats.ts_logging);
196 +       stats.ts_blocks = commit_transaction->t_outstanding_credits;
197 +       stats.ts_blocks_logged = 0;
199         descriptor = NULL;
200         bufs = 0;
201         while (commit_transaction->t_buffers) {
202 @@ -710,6 +725,7 @@ start_journal_io:
203                                 submit_bh(WRITE, bh);
204                         }
205                         cond_resched();
206 +                       stats.ts_blocks_logged += bufs;
208                         /* Force a new descriptor to be generated next
209                             time round the loop. */
210 @@ -903,6 +919,7 @@ restart_loop:
211                 cp_transaction = jh->b_cp_transaction;
212                 if (cp_transaction) {
213                         JBUFFER_TRACE(jh, "remove from old cp transaction");
214 +                       cp_transaction->t_chp_stats.cs_dropped++;
215                         __jbd2_journal_remove_checkpoint(jh);
216                 }
218 @@ -977,6 +994,36 @@ restart_loop:
220         J_ASSERT(commit_transaction->t_state == T_COMMIT);
222 +       commit_transaction->t_start = CURRENT_MSECS;
223 +       stats.ts_logging = jbd2_time_diff(stats.ts_logging,
224 +                                               commit_transaction->t_start);
226 +       /*
227 +        * File the transaction for history
228 +        */
229 +       stats.ts_type = JBD2_STATS_RUN;
230 +       stats.ts_tid = commit_transaction->t_tid;
231 +       stats.ts_handle_count = commit_transaction->t_handle_count;
232 +       spin_lock(&journal->j_history_lock);
233 +       memcpy(journal->j_history + journal->j_history_cur, &stats,
234 +                       sizeof(stats));
235 +       if (++journal->j_history_cur == journal->j_history_max)
236 +               journal->j_history_cur = 0;
238 +       /*
239 +        * Calculate overall stats
240 +        */
241 +       journal->j_stats.ts_tid++;
242 +       journal->j_stats.ts_wait += stats.ts_wait;
243 +       journal->j_stats.ts_running += stats.ts_running;
244 +       journal->j_stats.ts_locked += stats.ts_locked;
245 +       journal->j_stats.ts_flushing += stats.ts_flushing;
246 +       journal->j_stats.ts_logging += stats.ts_logging;
247 +       journal->j_stats.ts_handle_count += stats.ts_handle_count;
248 +       journal->j_stats.ts_blocks += stats.ts_blocks;
249 +       journal->j_stats.ts_blocks_logged += stats.ts_blocks_logged;
250 +       spin_unlock(&journal->j_history_lock);
252         commit_transaction->t_state = T_FINISHED;
253         J_ASSERT(commit_transaction == journal->j_committing_transaction);
254         journal->j_commit_sequence = commit_transaction->t_tid;
255 Index: linux-2.6.23-rc6/fs/jbd2/journal.c
256 ===================================================================
257 --- linux-2.6.23-rc6.orig/fs/jbd2/journal.c     2007-09-14 10:06:01.000000000 -0700
258 +++ linux-2.6.23-rc6/fs/jbd2/journal.c  2007-09-14 10:06:10.000000000 -0700
259 @@ -36,6 +36,7 @@
260  #include <linux/poison.h>
261  #include <linux/proc_fs.h>
262  #include <linux/debugfs.h>
263 +#include <linux/seq_file.h>
265  #include <asm/uaccess.h>
266  #include <asm/page.h>
267 @@ -641,6 +642,306 @@ struct journal_head *jbd2_journal_get_de
268         return jbd2_journal_add_journal_head(bh);
271 +struct jbd2_stats_proc_session {
272 +       journal_t *journal;
273 +       struct transaction_stats_s *stats;
274 +       int start;
275 +       int max;
278 +static void *jbd2_history_skip_empty(struct jbd2_stats_proc_session *s,
279 +                                       struct transaction_stats_s *ts,
280 +                                       int first)
282 +       if (ts == s->stats + s->max)
283 +               ts = s->stats;
284 +       if (!first && ts == s->stats + s->start)
285 +               return NULL;
286 +       while (ts->ts_type == 0) {
287 +               ts++;
288 +               if (ts == s->stats + s->max)
289 +                       ts = s->stats;
290 +               if (ts == s->stats + s->start)
291 +                       return NULL;
292 +       }
293 +       return ts;
297 +static void *jbd2_seq_history_start(struct seq_file *seq, loff_t *pos)
299 +       struct jbd2_stats_proc_session *s = seq->private;
300 +       struct transaction_stats_s *ts;
301 +       int l = *pos;
303 +       if (l == 0)
304 +               return SEQ_START_TOKEN;
305 +       ts = jbd2_history_skip_empty(s, s->stats + s->start, 1);
306 +       if (!ts)
307 +               return NULL;
308 +       l--;
309 +       while (l) {
310 +               ts = jbd2_history_skip_empty(s, ++ts, 0);
311 +               if (!ts)
312 +                       break;
313 +               l--;
314 +       }
315 +       return ts;
318 +static void *jbd2_seq_history_next(struct seq_file *seq, void *v, loff_t *pos)
320 +       struct jbd2_stats_proc_session *s = seq->private;
321 +       struct transaction_stats_s *ts = v;
323 +       ++*pos;
324 +       if (v == SEQ_START_TOKEN)
325 +               return jbd2_history_skip_empty(s, s->stats + s->start, 1);
326 +       else
327 +               return jbd2_history_skip_empty(s, ++ts, 0);
330 +static int jbd2_seq_history_show(struct seq_file *seq, void *v)
332 +       struct transaction_stats_s *ts = v;
333 +       if (v == SEQ_START_TOKEN) {
334 +               seq_printf(seq, "%-4s %-5s %-5s %-5s %-5s %-5s %-5s %-6s %-5s "
335 +                               "%-5s %-5s %-5s %-5s %-5s\n", "R/C", "tid",
336 +                               "wait", "run", "lock", "flush", "log", "hndls",
337 +                               "block", "inlog", "ctime", "write", "drop",
338 +                               "close");
339 +               return 0;
340 +       }
341 +       if (ts->ts_type == JBD2_STATS_RUN)
342 +               seq_printf(seq, "%-4s %-5lu %-5lu %-5lu %-5lu %-5lu %-5lu "
343 +                               "%-6lu %-5lu %-5lu\n", "R", ts->ts_tid,
344 +                               ts->ts_wait, ts->ts_running, ts->ts_locked,
345 +                               ts->ts_flushing, ts->ts_logging,
346 +                               ts->ts_handle_count, ts->ts_blocks,
347 +                               ts->ts_blocks_logged);
348 +       else if (ts->ts_type == JBD2_STATS_CHECKPOINT)
349 +               seq_printf(seq, "%-4s %-5lu %48s %-5lu %-5lu %-5lu %-5lu\n",
350 +                               "C", ts->ts_tid, " ", ts->ts_chp_time,
351 +                               ts->ts_written, ts->ts_dropped,
352 +                               ts->ts_forced_to_close);
353 +       else
354 +               J_ASSERT(0);
355 +       return 0;
358 +static void jbd2_seq_history_stop(struct seq_file *seq, void *v)
362 +static struct seq_operations jbd2_seq_history_ops = {
363 +       .start  = jbd2_seq_history_start,
364 +       .next   = jbd2_seq_history_next,
365 +       .stop   = jbd2_seq_history_stop,
366 +       .show   = jbd2_seq_history_show,
369 +static int jbd2_seq_history_open(struct inode *inode, struct file *file)
371 +       journal_t *journal = PDE(inode)->data;
372 +       struct jbd2_stats_proc_session *s;
373 +       int rc, size;
375 +       s = kmalloc(sizeof(*s), GFP_KERNEL);
376 +       if (s == NULL)
377 +               return -EIO;
378 +       size = sizeof(struct transaction_stats_s) * journal->j_history_max;
379 +       s->stats = kmalloc(size, GFP_KERNEL);
380 +       if (s == NULL) {
381 +               kfree(s);
382 +               return -EIO;
383 +       }
384 +       spin_lock(&journal->j_history_lock);
385 +       memcpy(s->stats, journal->j_history, size);
386 +       s->max = journal->j_history_max;
387 +       s->start = journal->j_history_cur % s->max;
388 +       spin_unlock(&journal->j_history_lock);
390 +       rc = seq_open(file, &jbd2_seq_history_ops);
391 +       if (rc == 0) {
392 +               struct seq_file *m = (struct seq_file *)file->private_data;
393 +               m->private = s;
394 +       } else {
395 +               kfree(s->stats);
396 +               kfree(s);
397 +       }
398 +       return rc;
402 +static int jbd2_seq_history_release(struct inode *inode, struct file *file)
404 +       struct seq_file *seq = (struct seq_file *)file->private_data;
405 +       struct jbd2_stats_proc_session *s = seq->private;
406 +       kfree(s->stats);
407 +       kfree(s);
408 +       return seq_release(inode, file);
411 +static struct file_operations jbd2_seq_history_fops = {
412 +       .owner          = THIS_MODULE,
413 +       .open           = jbd2_seq_history_open,
414 +       .read           = seq_read,
415 +       .llseek         = seq_lseek,
416 +       .release        = jbd2_seq_history_release,
419 +static void *jbd2_seq_info_start(struct seq_file *seq, loff_t *pos)
421 +       return *pos ? NULL : SEQ_START_TOKEN;
424 +static void *jbd2_seq_info_next(struct seq_file *seq, void *v, loff_t *pos)
426 +       return NULL;
429 +static int jbd2_seq_info_show(struct seq_file *seq, void *v)
431 +       struct jbd2_stats_proc_session *s = seq->private;
432 +       if (v != SEQ_START_TOKEN)
433 +               return 0;
434 +       seq_printf(seq, "%lu transaction, each upto %u blocks\n",
435 +                       s->stats->ts_tid,
436 +                       s->journal->j_max_transaction_buffers);
437 +       if (s->stats->ts_tid == 0)
438 +               return 0;
439 +       seq_printf(seq, "average: \n  %lums waiting for transaction\n",
440 +                       s->stats->ts_wait / s->stats->ts_tid);
441 +       seq_printf(seq, "  %lums running transaction\n",
442 +                       s->stats->ts_running / s->stats->ts_tid);
443 +       seq_printf(seq, "  %lums transaction was being locked\n",
444 +                       s->stats->ts_locked / s->stats->ts_tid);
445 +       seq_printf(seq, "  %lums flushing data (in ordered mode)\n",
446 +                       s->stats->ts_flushing / s->stats->ts_tid);
447 +       seq_printf(seq, "  %lums logging transaction\n",
448 +                       s->stats->ts_logging / s->stats->ts_tid);
449 +       seq_printf(seq, "  %lu handles per transaction\n",
450 +                       s->stats->ts_handle_count / s->stats->ts_tid);
451 +       seq_printf(seq, "  %lu blocks per transaction\n",
452 +                       s->stats->ts_blocks / s->stats->ts_tid);
453 +       seq_printf(seq, "  %lu logged blocks per transaction\n",
454 +                       s->stats->ts_blocks_logged / s->stats->ts_tid);
455 +       return 0;
458 +static void jbd2_seq_info_stop(struct seq_file *seq, void *v)
462 +static struct seq_operations jbd2_seq_info_ops = {
463 +       .start  = jbd2_seq_info_start,
464 +       .next   = jbd2_seq_info_next,
465 +       .stop   = jbd2_seq_info_stop,
466 +       .show   = jbd2_seq_info_show,
469 +static int jbd2_seq_info_open(struct inode *inode, struct file *file)
471 +       journal_t *journal = PDE(inode)->data;
472 +       struct jbd2_stats_proc_session *s;
473 +       int rc, size;
475 +       s = kmalloc(sizeof(*s), GFP_KERNEL);
476 +       if (s == NULL)
477 +               return -EIO;
478 +       size = sizeof(struct transaction_stats_s);
479 +       s->stats = kmalloc(size, GFP_KERNEL);
480 +       if (s == NULL) {
481 +               kfree(s);
482 +               return -EIO;
483 +       }
484 +       spin_lock(&journal->j_history_lock);
485 +       memcpy(s->stats, &journal->j_stats, size);
486 +       s->journal = journal;
487 +       spin_unlock(&journal->j_history_lock);
489 +       rc = seq_open(file, &jbd2_seq_info_ops);
490 +       if (rc == 0) {
491 +               struct seq_file *m = (struct seq_file *)file->private_data;
492 +               m->private = s;
493 +       } else {
494 +               kfree(s->stats);
495 +               kfree(s);
496 +       }
497 +       return rc;
501 +static int jbd2_seq_info_release(struct inode *inode, struct file *file)
503 +       struct seq_file *seq = (struct seq_file *)file->private_data;
504 +       struct jbd2_stats_proc_session *s = seq->private;
505 +       kfree(s->stats);
506 +       kfree(s);
507 +       return seq_release(inode, file);
510 +static struct file_operations jbd2_seq_info_fops = {
511 +       .owner          = THIS_MODULE,
512 +       .open           = jbd2_seq_info_open,
513 +       .read           = seq_read,
514 +       .llseek         = seq_lseek,
515 +       .release        = jbd2_seq_info_release,
518 +static struct proc_dir_entry *proc_jbd2_stats;
520 +static void jbd2_stats_proc_init(journal_t *journal)
522 +       char name[64];
524 +       snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name));
525 +       journal->j_proc_entry = proc_mkdir(name, proc_jbd2_stats);
526 +       if (journal->j_proc_entry) {
527 +               struct proc_dir_entry *p;
528 +               p = create_proc_entry("history", S_IRUGO,
529 +                               journal->j_proc_entry);
530 +               if (p) {
531 +                       p->proc_fops = &jbd2_seq_history_fops;
532 +                       p->data = journal;
533 +                       p = create_proc_entry("info", S_IRUGO,
534 +                                               journal->j_proc_entry);
535 +                       if (p) {
536 +                               p->proc_fops = &jbd2_seq_info_fops;
537 +                               p->data = journal;
538 +                       }
539 +               }
540 +       }
543 +static void jbd2_stats_proc_exit(journal_t *journal)
545 +       char name[64];
547 +       snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name));
548 +       remove_proc_entry("info", journal->j_proc_entry);
549 +       remove_proc_entry("history", journal->j_proc_entry);
550 +       remove_proc_entry(name, proc_jbd2_stats);
553 +static void journal_init_stats(journal_t *journal)
555 +       int size;
557 +       if (proc_jbd2_stats == NULL)
558 +               return;
560 +       journal->j_history_max = 100;
561 +       size = sizeof(struct transaction_stats_s) * journal->j_history_max;
562 +       journal->j_history = kmalloc(size, GFP_KERNEL);
563 +       if (journal->j_history == NULL) {
564 +               journal->j_history_max = 0;
565 +               return;
566 +       }
567 +       memset(journal->j_history, 0, size);
568 +       spin_lock_init(&journal->j_history_lock);
571  /*
572   * Management for journal control blocks: functions to create and
573   * destroy journal_t structures, and to initialise and read existing
574 @@ -683,6 +984,9 @@ static journal_t * journal_init_common (
575                 jbd2_kfree(journal);
576                 goto fail;
577         }
579 +       journal_init_stats(journal);
581         return journal;
582  fail:
583         return NULL;
584 @@ -733,6 +1037,7 @@ journal_t * jbd2_journal_init_dev(struct
585                 journal = NULL;
586                 goto out;
587         }
588 +       jbd2_stats_proc_init(journal);
589         journal->j_dev = bdev;
590         journal->j_fs_dev = fs_dev;
591         journal->j_blk_offset = start;
592 @@ -775,6 +1080,7 @@ journal_t * jbd2_journal_init_inode (str
594         journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits;
595         journal->j_blocksize = inode->i_sb->s_blocksize;
596 +       jbd2_stats_proc_init(journal);
598         /* journal descriptor can store up to n blocks -bzzz */
599         n = journal->j_blocksize / sizeof(journal_block_tag_t);
600 @@ -1155,6 +1461,8 @@ void jbd2_journal_destroy(journal_t *jou
601                 brelse(journal->j_sb_buffer);
602         }
604 +       if (journal->j_proc_entry)
605 +               jbd2_stats_proc_exit(journal);
606         if (journal->j_inode)
607                 iput(journal->j_inode);
608         if (journal->j_revoke)
609 @@ -1942,6 +2250,28 @@ static void __exit jbd2_remove_debugfs_e
611  #endif
613 +#if defined(CONFIG_PROC_FS)
615 +#define JBD2_STATS_PROC_NAME "fs/jbd2"
617 +static void __init jbd2_create_jbd_stats_proc_entry(void)
619 +       proc_jbd2_stats = proc_mkdir(JBD2_STATS_PROC_NAME, NULL);
622 +static void __exit jbd2_remove_jbd_stats_proc_entry(void)
624 +       if (proc_jbd2_stats)
625 +               remove_proc_entry(JBD2_STATS_PROC_NAME, NULL);
628 +#else
630 +#define jbd2_create_jbd_stats_proc_entry() do {} while (0)
631 +#define jbd2_remove_jbd_stats_proc_entry() do {} while (0)
633 +#endif
635  struct kmem_cache *jbd2_handle_cache;
637  static int __init journal_init_handle_cache(void)
638 @@ -1997,6 +2327,7 @@ static int __init journal_init(void)
639         if (ret != 0)
640                 jbd2_journal_destroy_caches();
641         jbd2_create_debugfs_entry();
642 +       jbd2_create_jbd_stats_proc_entry();
643         return ret;
646 @@ -2008,6 +2339,7 @@ static void __exit journal_exit(void)
647                 printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n);
648  #endif
649         jbd2_remove_debugfs_entry();
650 +       jbd2_remove_jbd_stats_proc_entry();
651         jbd2_journal_destroy_caches();
654 Index: linux-2.6.23-rc6/fs/jbd2/transaction.c
655 ===================================================================
656 --- linux-2.6.23-rc6.orig/fs/jbd2/transaction.c 2007-09-14 10:01:50.000000000 -0700
657 +++ linux-2.6.23-rc6/fs/jbd2/transaction.c      2007-09-14 10:06:10.000000000 -0700
658 @@ -59,6 +59,8 @@ jbd2_get_transaction(journal_t *journal,
660         J_ASSERT(journal->j_running_transaction == NULL);
661         journal->j_running_transaction = transaction;
662 +       transaction->t_max_wait = 0;
663 +       transaction->t_start = CURRENT_MSECS;
665         return transaction;
667 @@ -85,6 +87,7 @@ static int start_this_handle(journal_t *
668         int nblocks = handle->h_buffer_credits;
669         transaction_t *new_transaction = NULL;
670         int ret = 0;
671 +       unsigned long ts = CURRENT_MSECS;
673         if (nblocks > journal->j_max_transaction_buffers) {
674                 printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
675 @@ -218,6 +221,12 @@ repeat_locked:
676         /* OK, account for the buffers that this operation expects to
677          * use and add the handle to the running transaction. */
679 +       if (time_after(transaction->t_start, ts)) {
680 +               ts = jbd2_time_diff(ts, transaction->t_start);
681 +               if (ts > transaction->t_max_wait)
682 +                       transaction->t_max_wait = ts;
683 +       }
685         handle->h_transaction = transaction;
686         transaction->t_outstanding_credits += nblocks;
687         transaction->t_updates++;
688 Index: linux-2.6.23-rc6/include/linux/jbd2.h
689 ===================================================================
690 --- linux-2.6.23-rc6.orig/include/linux/jbd2.h  2007-09-14 10:03:08.000000000 -0700
691 +++ linux-2.6.23-rc6/include/linux/jbd2.h       2007-09-14 10:06:10.000000000 -0700
692 @@ -451,6 +451,16 @@ struct handle_s
693  };
697 + * Some stats for checkpoint phase
698 + */
699 +struct transaction_chp_stats_s {
700 +       unsigned long           cs_chp_time;
701 +       unsigned long           cs_forced_to_close;
702 +       unsigned long           cs_written;
703 +       unsigned long           cs_dropped;
706  /* The transaction_t type is the guts of the journaling mechanism.  It
707   * tracks a compound transaction through its various states:
708   *
709 @@ -586,6 +596,21 @@ struct transaction_s
710         spinlock_t              t_handle_lock;
712         /*
713 +        * Longest time some handle had to wait for running transaction
714 +        */
715 +       unsigned long           t_max_wait;
717 +       /*
718 +        * When transaction started
719 +        */
720 +       unsigned long           t_start;
722 +       /*
723 +        * Checkpointing stats [j_checkpoint_sem]
724 +        */
725 +       struct transaction_chp_stats_s t_chp_stats;
727 +       /*
728          * Number of outstanding updates running on this transaction
729          * [t_handle_lock]
730          */
731 @@ -616,6 +641,57 @@ struct transaction_s
733  };
735 +struct transaction_run_stats_s {
736 +       unsigned long           rs_wait;
737 +       unsigned long           rs_running;
738 +       unsigned long           rs_locked;
739 +       unsigned long           rs_flushing;
740 +       unsigned long           rs_logging;
742 +       unsigned long           rs_handle_count;
743 +       unsigned long           rs_blocks;
744 +       unsigned long           rs_blocks_logged;
747 +struct transaction_stats_s
749 +       int                     ts_type;
750 +       unsigned long           ts_tid;
751 +       union {
752 +               struct transaction_run_stats_s run;
753 +               struct transaction_chp_stats_s chp;
754 +       } u;
757 +#define JBD2_STATS_RUN         1
758 +#define JBD2_STATS_CHECKPOINT  2
760 +#define ts_wait                        u.run.rs_wait
761 +#define ts_running             u.run.rs_running
762 +#define ts_locked              u.run.rs_locked
763 +#define ts_flushing            u.run.rs_flushing
764 +#define ts_logging             u.run.rs_logging
765 +#define ts_handle_count                u.run.rs_handle_count
766 +#define ts_blocks              u.run.rs_blocks
767 +#define ts_blocks_logged       u.run.rs_blocks_logged
769 +#define ts_chp_time            u.chp.cs_chp_time
770 +#define ts_forced_to_close     u.chp.cs_forced_to_close
771 +#define ts_written             u.chp.cs_written
772 +#define ts_dropped             u.chp.cs_dropped
774 +#define CURRENT_MSECS          (jiffies_to_msecs(jiffies))
776 +static inline unsigned int
777 +jbd2_time_diff(unsigned int start, unsigned int end)
779 +       if (unlikely(start > end))
780 +               end = end + (~0UL - start);
781 +       else
782 +               end -= start;
783 +       return end;
786  /**
787   * struct journal_s - The journal_s type is the concrete type associated with
788   *     journal_t.
789 @@ -677,6 +753,12 @@ struct transaction_s
790   * @j_wbufsize: maximum number of buffer_heads allowed in j_wbuf, the
791   *     number that will fit in j_blocksize
792   * @j_last_sync_writer: most recent pid which did a synchronous write
793 + * @j_history: Buffer storing the transactions statistics history
794 + * @j_history_max: Maximum number of transactions in the statistics history
795 + * @j_history_cur: Current number of transactions in the statistics history
796 + * @j_history_lock: Protect the transactions statistics history
797 + * @j_proc_entry: procfs entry for the jbd statistics directory
798 + * @j_stats: Overall statistics
799   * @j_private: An opaque pointer to fs-private information.
800   */
802 @@ -869,6 +951,16 @@ struct journal_s
803         pid_t                   j_last_sync_writer;
805         /*
806 +        * Journal statistics
807 +        */
808 +       struct transaction_stats_s *j_history;
809 +       int                     j_history_max;
810 +       int                     j_history_cur;
811 +       spinlock_t              j_history_lock;
812 +       struct proc_dir_entry   *j_proc_entry;
813 +       struct transaction_stats_s j_stats;
815 +       /*
816          * An opaque pointer to fs-private information.  ext3 puts its
817          * superblock pointer here
818          */