2 * Functions to sequence FLUSH and FUA writes.
4 #include <linux/kernel.h>
5 #include <linux/module.h>
7 #include <linux/blkdev.h>
12 /* FLUSH/FUA sequences */
14 QUEUE_FSEQ_STARTED
= (1 << 0), /* flushing in progress */
15 QUEUE_FSEQ_PREFLUSH
= (1 << 1), /* pre-flushing in progress */
16 QUEUE_FSEQ_DATA
= (1 << 2), /* data write in progress */
17 QUEUE_FSEQ_POSTFLUSH
= (1 << 3), /* post-flushing in progress */
18 QUEUE_FSEQ_DONE
= (1 << 4),
21 static struct request
*queue_next_fseq(struct request_queue
*q
);
23 unsigned blk_flush_cur_seq(struct request_queue
*q
)
27 return 1 << ffz(q
->flush_seq
);
30 static struct request
*blk_flush_complete_seq(struct request_queue
*q
,
31 unsigned seq
, int error
)
33 struct request
*next_rq
= NULL
;
35 if (error
&& !q
->flush_err
)
38 BUG_ON(q
->flush_seq
& seq
);
41 if (blk_flush_cur_seq(q
) != QUEUE_FSEQ_DONE
) {
42 /* not complete yet, queue the next flush sequence */
43 next_rq
= queue_next_fseq(q
);
45 /* complete this flush request */
46 __blk_end_request_all(q
->orig_flush_rq
, q
->flush_err
);
47 q
->orig_flush_rq
= NULL
;
50 /* dispatch the next flush if there's one */
51 if (!list_empty(&q
->pending_flushes
)) {
52 next_rq
= list_entry_rq(q
->pending_flushes
.next
);
53 list_move(&next_rq
->queuelist
, &q
->queue_head
);
59 static void blk_flush_complete_seq_end_io(struct request_queue
*q
,
60 unsigned seq
, int error
)
62 bool was_empty
= elv_queue_empty(q
);
63 struct request
*next_rq
;
65 next_rq
= blk_flush_complete_seq(q
, seq
, error
);
68 * Moving a request silently to empty queue_head may stall the
69 * queue. Kick the queue in those cases. This function is called
70 * from request completion path and calling directly into
71 * request_fn may confuse the driver. Always use kblockd.
73 if (was_empty
&& next_rq
)
74 __blk_run_queue(q
, true);
77 static void pre_flush_end_io(struct request
*rq
, int error
)
79 elv_completed_request(rq
->q
, rq
);
80 blk_flush_complete_seq_end_io(rq
->q
, QUEUE_FSEQ_PREFLUSH
, error
);
83 static void flush_data_end_io(struct request
*rq
, int error
)
85 elv_completed_request(rq
->q
, rq
);
86 blk_flush_complete_seq_end_io(rq
->q
, QUEUE_FSEQ_DATA
, error
);
89 static void post_flush_end_io(struct request
*rq
, int error
)
91 elv_completed_request(rq
->q
, rq
);
92 blk_flush_complete_seq_end_io(rq
->q
, QUEUE_FSEQ_POSTFLUSH
, error
);
95 static void init_flush_request(struct request
*rq
, struct gendisk
*disk
)
97 rq
->cmd_type
= REQ_TYPE_FS
;
98 rq
->cmd_flags
= WRITE_FLUSH
;
102 static struct request
*queue_next_fseq(struct request_queue
*q
)
104 struct request
*orig_rq
= q
->orig_flush_rq
;
105 struct request
*rq
= &q
->flush_rq
;
109 switch (blk_flush_cur_seq(q
)) {
110 case QUEUE_FSEQ_PREFLUSH
:
111 init_flush_request(rq
, orig_rq
->rq_disk
);
112 rq
->end_io
= pre_flush_end_io
;
114 case QUEUE_FSEQ_DATA
:
115 init_request_from_bio(rq
, orig_rq
->bio
);
117 * orig_rq->rq_disk may be different from
118 * bio->bi_bdev->bd_disk if orig_rq got here through
119 * remapping drivers. Make sure rq->rq_disk points
120 * to the same one as orig_rq.
122 rq
->rq_disk
= orig_rq
->rq_disk
;
123 rq
->cmd_flags
&= ~(REQ_FLUSH
| REQ_FUA
);
124 rq
->cmd_flags
|= orig_rq
->cmd_flags
& (REQ_FLUSH
| REQ_FUA
);
125 rq
->end_io
= flush_data_end_io
;
127 case QUEUE_FSEQ_POSTFLUSH
:
128 init_flush_request(rq
, orig_rq
->rq_disk
);
129 rq
->end_io
= post_flush_end_io
;
135 elv_insert(q
, rq
, ELEVATOR_INSERT_REQUEUE
);
139 struct request
*blk_do_flush(struct request_queue
*q
, struct request
*rq
)
141 unsigned int fflags
= q
->flush_flags
; /* may change, cache it */
142 bool has_flush
= fflags
& REQ_FLUSH
, has_fua
= fflags
& REQ_FUA
;
143 bool do_preflush
= has_flush
&& (rq
->cmd_flags
& REQ_FLUSH
);
144 bool do_postflush
= has_flush
&& !has_fua
&& (rq
->cmd_flags
& REQ_FUA
);
148 * Special case. If there's data but flush is not necessary,
149 * the request can be issued directly.
151 * Flush w/o data should be able to be issued directly too but
152 * currently some drivers assume that rq->bio contains
153 * non-zero data if it isn't NULL and empty FLUSH requests
154 * getting here usually have bio's without data.
156 if (blk_rq_sectors(rq
) && !do_preflush
&& !do_postflush
) {
157 rq
->cmd_flags
&= ~REQ_FLUSH
;
159 rq
->cmd_flags
&= ~REQ_FUA
;
164 * Sequenced flushes can't be processed in parallel. If
165 * another one is already in progress, queue for later
169 list_move_tail(&rq
->queuelist
, &q
->pending_flushes
);
174 * Start a new flush sequence
177 q
->flush_seq
|= QUEUE_FSEQ_STARTED
;
179 /* adjust FLUSH/FUA of the original request and stash it away */
180 rq
->cmd_flags
&= ~REQ_FLUSH
;
182 rq
->cmd_flags
&= ~REQ_FUA
;
183 blk_dequeue_request(rq
);
184 q
->orig_flush_rq
= rq
;
186 /* skip unneded sequences and return the first one */
188 skip
|= QUEUE_FSEQ_PREFLUSH
;
189 if (!blk_rq_sectors(rq
))
190 skip
|= QUEUE_FSEQ_DATA
;
192 skip
|= QUEUE_FSEQ_POSTFLUSH
;
193 return blk_flush_complete_seq(q
, skip
, 0);
196 static void bio_end_flush(struct bio
*bio
, int err
)
199 clear_bit(BIO_UPTODATE
, &bio
->bi_flags
);
201 complete(bio
->bi_private
);
206 * blkdev_issue_flush - queue a flush
207 * @bdev: blockdev to issue flush for
208 * @gfp_mask: memory allocation flags (for bio_alloc)
209 * @error_sector: error sector
212 * Issue a flush for the block device in question. Caller can supply
213 * room for storing the error offset in case of a flush error, if they
214 * wish to. If WAIT flag is not passed then caller may check only what
215 * request was pushed in some internal queue for later handling.
217 int blkdev_issue_flush(struct block_device
*bdev
, gfp_t gfp_mask
,
218 sector_t
*error_sector
)
220 DECLARE_COMPLETION_ONSTACK(wait
);
221 struct request_queue
*q
;
225 if (bdev
->bd_disk
== NULL
)
228 q
= bdev_get_queue(bdev
);
233 * some block devices may not have their queue correctly set up here
234 * (e.g. loop device without a backing file) and so issuing a flush
235 * here will panic. Ensure there is a request function before issuing
238 if (!q
->make_request_fn
)
241 bio
= bio_alloc(gfp_mask
, 0);
242 bio
->bi_end_io
= bio_end_flush
;
244 bio
->bi_private
= &wait
;
247 submit_bio(WRITE_FLUSH
, bio
);
248 wait_for_completion(&wait
);
251 * The driver must store the error location in ->bi_sector, if
252 * it supports it. For non-stacked drivers, this should be
253 * copied from blk_rq_pos(rq).
256 *error_sector
= bio
->bi_sector
;
258 if (!bio_flagged(bio
, BIO_UPTODATE
))
264 EXPORT_SYMBOL(blkdev_issue_flush
);