2 * Copyright (C) 2015 IT University of Copenhagen (rrpc.c)
3 * Copyright (C) 2016 CNEX Labs
4 * Initial release: Javier Gonzalez <javier@cnexlabs.com>
5 * Matias Bjorling <matias@cnexlabs.com>
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License version
9 * 2 as published by the Free Software Foundation.
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
16 * Implementation of a physical block-device target for Open-channel SSDs.
18 * pblk-init.c - pblk's initialization.
23 static struct kmem_cache
*pblk_blk_ws_cache
, *pblk_rec_cache
, *pblk_g_rq_cache
,
24 *pblk_w_rq_cache
, *pblk_line_meta_cache
;
25 static DECLARE_RWSEM(pblk_lock
);
26 struct bio_set
*pblk_bio_set
;
28 static int pblk_rw_io(struct request_queue
*q
, struct pblk
*pblk
,
33 /* Read requests must be <= 256kb due to NVMe's 64 bit completion bitmap
34 * constraint. Writes can be of arbitrary size.
36 if (bio_data_dir(bio
) == READ
) {
37 blk_queue_split(q
, &bio
);
38 ret
= pblk_submit_read(pblk
, bio
);
39 if (ret
== NVM_IO_DONE
&& bio_flagged(bio
, BIO_CLONED
))
45 /* Prevent deadlock in the case of a modest LUN configuration and large
46 * user I/Os. Unless stalled, the rate limiter leaves at least 256KB
47 * available for user I/O.
49 if (unlikely(pblk_get_secs(bio
) >= pblk_rl_sysfs_rate_show(&pblk
->rl
)))
50 blk_queue_split(q
, &bio
);
52 return pblk_write_to_cache(pblk
, bio
, PBLK_IOTYPE_USER
);
55 static blk_qc_t
pblk_make_rq(struct request_queue
*q
, struct bio
*bio
)
57 struct pblk
*pblk
= q
->queuedata
;
59 if (bio_op(bio
) == REQ_OP_DISCARD
) {
60 pblk_discard(pblk
, bio
);
61 if (!(bio
->bi_opf
& REQ_PREFLUSH
)) {
67 switch (pblk_rw_io(q
, pblk
, bio
)) {
79 static void pblk_l2p_free(struct pblk
*pblk
)
81 vfree(pblk
->trans_map
);
84 static int pblk_l2p_init(struct pblk
*pblk
)
90 if (pblk
->ppaf_bitsize
< 32)
93 pblk
->trans_map
= vmalloc(entry_size
* pblk
->rl
.nr_secs
);
97 pblk_ppa_set_empty(&ppa
);
99 for (i
= 0; i
< pblk
->rl
.nr_secs
; i
++)
100 pblk_trans_map_set(pblk
, i
, ppa
);
105 static void pblk_rwb_free(struct pblk
*pblk
)
107 if (pblk_rb_tear_down_check(&pblk
->rwb
))
108 pr_err("pblk: write buffer error on tear down\n");
110 pblk_rb_data_free(&pblk
->rwb
);
111 vfree(pblk_rb_entries_ref(&pblk
->rwb
));
114 static int pblk_rwb_init(struct pblk
*pblk
)
116 struct nvm_tgt_dev
*dev
= pblk
->dev
;
117 struct nvm_geo
*geo
= &dev
->geo
;
118 struct pblk_rb_entry
*entries
;
119 unsigned long nr_entries
;
120 unsigned int power_size
, power_seg_sz
;
122 nr_entries
= pblk_rb_calculate_size(pblk
->pgs_in_buffer
);
124 entries
= vzalloc(nr_entries
* sizeof(struct pblk_rb_entry
));
128 power_size
= get_count_order(nr_entries
);
129 power_seg_sz
= get_count_order(geo
->sec_size
);
131 return pblk_rb_init(&pblk
->rwb
, entries
, power_size
, power_seg_sz
);
134 /* Minimum pages needed within a lun */
135 #define ADDR_POOL_SIZE 64
137 static int pblk_set_ppaf(struct pblk
*pblk
)
139 struct nvm_tgt_dev
*dev
= pblk
->dev
;
140 struct nvm_geo
*geo
= &dev
->geo
;
141 struct nvm_addr_format ppaf
= geo
->ppaf
;
144 /* Re-calculate channel and lun format to adapt to configuration */
145 power_len
= get_count_order(geo
->nr_chnls
);
146 if (1 << power_len
!= geo
->nr_chnls
) {
147 pr_err("pblk: supports only power-of-two channel config.\n");
150 ppaf
.ch_len
= power_len
;
152 power_len
= get_count_order(geo
->luns_per_chnl
);
153 if (1 << power_len
!= geo
->luns_per_chnl
) {
154 pr_err("pblk: supports only power-of-two LUN config.\n");
157 ppaf
.lun_len
= power_len
;
159 pblk
->ppaf
.sec_offset
= 0;
160 pblk
->ppaf
.pln_offset
= ppaf
.sect_len
;
161 pblk
->ppaf
.ch_offset
= pblk
->ppaf
.pln_offset
+ ppaf
.pln_len
;
162 pblk
->ppaf
.lun_offset
= pblk
->ppaf
.ch_offset
+ ppaf
.ch_len
;
163 pblk
->ppaf
.pg_offset
= pblk
->ppaf
.lun_offset
+ ppaf
.lun_len
;
164 pblk
->ppaf
.blk_offset
= pblk
->ppaf
.pg_offset
+ ppaf
.pg_len
;
165 pblk
->ppaf
.sec_mask
= (1ULL << ppaf
.sect_len
) - 1;
166 pblk
->ppaf
.pln_mask
= ((1ULL << ppaf
.pln_len
) - 1) <<
167 pblk
->ppaf
.pln_offset
;
168 pblk
->ppaf
.ch_mask
= ((1ULL << ppaf
.ch_len
) - 1) <<
169 pblk
->ppaf
.ch_offset
;
170 pblk
->ppaf
.lun_mask
= ((1ULL << ppaf
.lun_len
) - 1) <<
171 pblk
->ppaf
.lun_offset
;
172 pblk
->ppaf
.pg_mask
= ((1ULL << ppaf
.pg_len
) - 1) <<
173 pblk
->ppaf
.pg_offset
;
174 pblk
->ppaf
.blk_mask
= ((1ULL << ppaf
.blk_len
) - 1) <<
175 pblk
->ppaf
.blk_offset
;
177 pblk
->ppaf_bitsize
= pblk
->ppaf
.blk_offset
+ ppaf
.blk_len
;
182 static int pblk_init_global_caches(struct pblk
*pblk
)
184 char cache_name
[PBLK_CACHE_NAME_LEN
];
186 down_write(&pblk_lock
);
187 pblk_blk_ws_cache
= kmem_cache_create("pblk_blk_ws",
188 sizeof(struct pblk_line_ws
), 0, 0, NULL
);
189 if (!pblk_blk_ws_cache
) {
190 up_write(&pblk_lock
);
194 pblk_rec_cache
= kmem_cache_create("pblk_rec",
195 sizeof(struct pblk_rec_ctx
), 0, 0, NULL
);
196 if (!pblk_rec_cache
) {
197 kmem_cache_destroy(pblk_blk_ws_cache
);
198 up_write(&pblk_lock
);
202 pblk_g_rq_cache
= kmem_cache_create("pblk_g_rq", pblk_g_rq_size
,
204 if (!pblk_g_rq_cache
) {
205 kmem_cache_destroy(pblk_blk_ws_cache
);
206 kmem_cache_destroy(pblk_rec_cache
);
207 up_write(&pblk_lock
);
211 pblk_w_rq_cache
= kmem_cache_create("pblk_w_rq", pblk_w_rq_size
,
213 if (!pblk_w_rq_cache
) {
214 kmem_cache_destroy(pblk_blk_ws_cache
);
215 kmem_cache_destroy(pblk_rec_cache
);
216 kmem_cache_destroy(pblk_g_rq_cache
);
217 up_write(&pblk_lock
);
221 snprintf(cache_name
, sizeof(cache_name
), "pblk_line_m_%s",
222 pblk
->disk
->disk_name
);
223 pblk_line_meta_cache
= kmem_cache_create(cache_name
,
224 pblk
->lm
.sec_bitmap_len
, 0, 0, NULL
);
225 if (!pblk_line_meta_cache
) {
226 kmem_cache_destroy(pblk_blk_ws_cache
);
227 kmem_cache_destroy(pblk_rec_cache
);
228 kmem_cache_destroy(pblk_g_rq_cache
);
229 kmem_cache_destroy(pblk_w_rq_cache
);
230 up_write(&pblk_lock
);
233 up_write(&pblk_lock
);
238 static int pblk_core_init(struct pblk
*pblk
)
240 struct nvm_tgt_dev
*dev
= pblk
->dev
;
241 struct nvm_geo
*geo
= &dev
->geo
;
243 pblk
->pgs_in_buffer
= NVM_MEM_PAGE_WRITE
* geo
->sec_per_pg
*
244 geo
->nr_planes
* geo
->nr_luns
;
246 if (pblk_init_global_caches(pblk
))
249 /* internal bios can be at most the sectors signaled by the device. */
250 pblk
->page_bio_pool
= mempool_create_page_pool(nvm_max_phys_sects(dev
),
252 if (!pblk
->page_bio_pool
)
255 pblk
->line_ws_pool
= mempool_create_slab_pool(PBLK_WS_POOL_SIZE
,
257 if (!pblk
->line_ws_pool
)
258 goto free_page_bio_pool
;
260 pblk
->rec_pool
= mempool_create_slab_pool(geo
->nr_luns
, pblk_rec_cache
);
262 goto free_blk_ws_pool
;
264 pblk
->g_rq_pool
= mempool_create_slab_pool(PBLK_READ_REQ_POOL_SIZE
,
266 if (!pblk
->g_rq_pool
)
269 pblk
->w_rq_pool
= mempool_create_slab_pool(geo
->nr_luns
* 2,
271 if (!pblk
->w_rq_pool
)
274 pblk
->line_meta_pool
=
275 mempool_create_slab_pool(PBLK_META_POOL_SIZE
,
276 pblk_line_meta_cache
);
277 if (!pblk
->line_meta_pool
)
280 pblk
->close_wq
= alloc_workqueue("pblk-close-wq",
281 WQ_MEM_RECLAIM
| WQ_UNBOUND
, PBLK_NR_CLOSE_JOBS
);
283 goto free_line_meta_pool
;
285 pblk
->bb_wq
= alloc_workqueue("pblk-bb-wq",
286 WQ_MEM_RECLAIM
| WQ_UNBOUND
, 0);
290 if (pblk_set_ppaf(pblk
))
293 if (pblk_rwb_init(pblk
))
296 INIT_LIST_HEAD(&pblk
->compl_list
);
300 destroy_workqueue(pblk
->bb_wq
);
302 destroy_workqueue(pblk
->close_wq
);
304 mempool_destroy(pblk
->line_meta_pool
);
306 mempool_destroy(pblk
->w_rq_pool
);
308 mempool_destroy(pblk
->g_rq_pool
);
310 mempool_destroy(pblk
->rec_pool
);
312 mempool_destroy(pblk
->line_ws_pool
);
314 mempool_destroy(pblk
->page_bio_pool
);
318 static void pblk_core_free(struct pblk
*pblk
)
321 destroy_workqueue(pblk
->close_wq
);
324 destroy_workqueue(pblk
->bb_wq
);
326 mempool_destroy(pblk
->page_bio_pool
);
327 mempool_destroy(pblk
->line_ws_pool
);
328 mempool_destroy(pblk
->rec_pool
);
329 mempool_destroy(pblk
->g_rq_pool
);
330 mempool_destroy(pblk
->w_rq_pool
);
331 mempool_destroy(pblk
->line_meta_pool
);
333 kmem_cache_destroy(pblk_blk_ws_cache
);
334 kmem_cache_destroy(pblk_rec_cache
);
335 kmem_cache_destroy(pblk_g_rq_cache
);
336 kmem_cache_destroy(pblk_w_rq_cache
);
337 kmem_cache_destroy(pblk_line_meta_cache
);
340 static void pblk_luns_free(struct pblk
*pblk
)
345 static void pblk_free_line_bitmaps(struct pblk_line
*line
)
347 kfree(line
->blk_bitmap
);
348 kfree(line
->erase_bitmap
);
351 static void pblk_lines_free(struct pblk
*pblk
)
353 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
354 struct pblk_line
*line
;
357 spin_lock(&l_mg
->free_lock
);
358 for (i
= 0; i
< l_mg
->nr_lines
; i
++) {
359 line
= &pblk
->lines
[i
];
361 pblk_line_free(pblk
, line
);
362 pblk_free_line_bitmaps(line
);
364 spin_unlock(&l_mg
->free_lock
);
367 static void pblk_line_meta_free(struct pblk
*pblk
)
369 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
372 kfree(l_mg
->bb_template
);
374 kfree(l_mg
->vsc_list
);
376 spin_lock(&l_mg
->free_lock
);
377 for (i
= 0; i
< PBLK_DATA_LINES
; i
++) {
378 kfree(l_mg
->sline_meta
[i
]);
379 pblk_mfree(l_mg
->eline_meta
[i
]->buf
, l_mg
->emeta_alloc_type
);
380 kfree(l_mg
->eline_meta
[i
]);
382 spin_unlock(&l_mg
->free_lock
);
387 static int pblk_bb_discovery(struct nvm_tgt_dev
*dev
, struct pblk_lun
*rlun
)
389 struct nvm_geo
*geo
= &dev
->geo
;
394 nr_blks
= geo
->blks_per_lun
* geo
->plane_mode
;
395 blks
= kmalloc(nr_blks
, GFP_KERNEL
);
400 ppa
.g
.ch
= rlun
->bppa
.g
.ch
;
401 ppa
.g
.lun
= rlun
->bppa
.g
.lun
;
403 ret
= nvm_get_tgt_bb_tbl(dev
, ppa
, blks
);
407 nr_blks
= nvm_bb_tbl_fold(dev
->parent
, blks
, nr_blks
);
413 rlun
->bb_list
= blks
;
421 static int pblk_bb_line(struct pblk
*pblk
, struct pblk_line
*line
,
424 struct nvm_tgt_dev
*dev
= pblk
->dev
;
425 struct nvm_geo
*geo
= &dev
->geo
;
426 struct pblk_lun
*rlun
;
430 for (i
= 0; i
< blk_per_line
; i
++) {
431 rlun
= &pblk
->luns
[i
];
432 if (rlun
->bb_list
[line
->id
] == NVM_BLK_T_FREE
)
435 set_bit(pblk_ppa_to_pos(geo
, rlun
->bppa
), line
->blk_bitmap
);
442 static int pblk_alloc_line_bitmaps(struct pblk
*pblk
, struct pblk_line
*line
)
444 struct pblk_line_meta
*lm
= &pblk
->lm
;
446 line
->blk_bitmap
= kzalloc(lm
->blk_bitmap_len
, GFP_KERNEL
);
447 if (!line
->blk_bitmap
)
450 line
->erase_bitmap
= kzalloc(lm
->blk_bitmap_len
, GFP_KERNEL
);
451 if (!line
->erase_bitmap
) {
452 kfree(line
->blk_bitmap
);
459 static int pblk_luns_init(struct pblk
*pblk
, struct ppa_addr
*luns
)
461 struct nvm_tgt_dev
*dev
= pblk
->dev
;
462 struct nvm_geo
*geo
= &dev
->geo
;
463 struct pblk_lun
*rlun
;
466 /* TODO: Implement unbalanced LUN support */
467 if (geo
->luns_per_chnl
< 0) {
468 pr_err("pblk: unbalanced LUN config.\n");
472 pblk
->luns
= kcalloc(geo
->nr_luns
, sizeof(struct pblk_lun
), GFP_KERNEL
);
476 for (i
= 0; i
< geo
->nr_luns
; i
++) {
477 /* Stripe across channels */
478 int ch
= i
% geo
->nr_chnls
;
479 int lun_raw
= i
/ geo
->nr_chnls
;
480 int lunid
= lun_raw
+ ch
* geo
->luns_per_chnl
;
482 rlun
= &pblk
->luns
[i
];
483 rlun
->bppa
= luns
[lunid
];
485 sema_init(&rlun
->wr_sem
, 1);
487 ret
= pblk_bb_discovery(dev
, rlun
);
490 kfree(pblk
->luns
[i
].bb_list
);
498 static int pblk_lines_configure(struct pblk
*pblk
, int flags
)
500 struct pblk_line
*line
= NULL
;
503 if (!(flags
& NVM_TARGET_FACTORY
)) {
504 line
= pblk_recov_l2p(pblk
);
506 pr_err("pblk: could not recover l2p table\n");
512 /* Configure next line for user data */
513 line
= pblk_line_get_first_data(pblk
);
515 pr_err("pblk: line list corrupted\n");
523 /* See comment over struct line_emeta definition */
524 static unsigned int calc_emeta_len(struct pblk
*pblk
)
526 struct pblk_line_meta
*lm
= &pblk
->lm
;
527 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
528 struct nvm_tgt_dev
*dev
= pblk
->dev
;
529 struct nvm_geo
*geo
= &dev
->geo
;
531 /* Round to sector size so that lba_list starts on its own sector */
532 lm
->emeta_sec
[1] = DIV_ROUND_UP(
533 sizeof(struct line_emeta
) + lm
->blk_bitmap_len
,
535 lm
->emeta_len
[1] = lm
->emeta_sec
[1] * geo
->sec_size
;
537 /* Round to sector size so that vsc_list starts on its own sector */
538 lm
->dsec_per_line
= lm
->sec_per_line
- lm
->emeta_sec
[0];
539 lm
->emeta_sec
[2] = DIV_ROUND_UP(lm
->dsec_per_line
* sizeof(u64
),
541 lm
->emeta_len
[2] = lm
->emeta_sec
[2] * geo
->sec_size
;
543 lm
->emeta_sec
[3] = DIV_ROUND_UP(l_mg
->nr_lines
* sizeof(u32
),
545 lm
->emeta_len
[3] = lm
->emeta_sec
[3] * geo
->sec_size
;
547 lm
->vsc_list_len
= l_mg
->nr_lines
* sizeof(u32
);
549 return (lm
->emeta_len
[1] + lm
->emeta_len
[2] + lm
->emeta_len
[3]);
552 static void pblk_set_provision(struct pblk
*pblk
, long nr_free_blks
)
554 struct nvm_tgt_dev
*dev
= pblk
->dev
;
555 struct nvm_geo
*geo
= &dev
->geo
;
556 sector_t provisioned
;
560 provisioned
= nr_free_blks
;
561 provisioned
*= (100 - pblk
->over_pct
);
562 sector_div(provisioned
, 100);
564 /* Internally pblk manages all free blocks, but all calculations based
565 * on user capacity consider only provisioned blocks
567 pblk
->rl
.total_blocks
= nr_free_blks
;
568 pblk
->rl
.nr_secs
= nr_free_blks
* geo
->sec_per_blk
;
569 pblk
->capacity
= provisioned
* geo
->sec_per_blk
;
570 atomic_set(&pblk
->rl
.free_blocks
, nr_free_blks
);
573 static int pblk_lines_alloc_metadata(struct pblk
*pblk
)
575 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
576 struct pblk_line_meta
*lm
= &pblk
->lm
;
579 /* smeta is always small enough to fit on a kmalloc memory allocation,
580 * emeta depends on the number of LUNs allocated to the pblk instance
582 for (i
= 0; i
< PBLK_DATA_LINES
; i
++) {
583 l_mg
->sline_meta
[i
] = kmalloc(lm
->smeta_len
, GFP_KERNEL
);
584 if (!l_mg
->sline_meta
[i
])
585 goto fail_free_smeta
;
588 /* emeta allocates three different buffers for managing metadata with
589 * in-memory and in-media layouts
591 for (i
= 0; i
< PBLK_DATA_LINES
; i
++) {
592 struct pblk_emeta
*emeta
;
594 emeta
= kmalloc(sizeof(struct pblk_emeta
), GFP_KERNEL
);
596 goto fail_free_emeta
;
598 if (lm
->emeta_len
[0] > KMALLOC_MAX_CACHE_SIZE
) {
599 l_mg
->emeta_alloc_type
= PBLK_VMALLOC_META
;
601 emeta
->buf
= vmalloc(lm
->emeta_len
[0]);
604 goto fail_free_emeta
;
607 emeta
->nr_entries
= lm
->emeta_sec
[0];
608 l_mg
->eline_meta
[i
] = emeta
;
610 l_mg
->emeta_alloc_type
= PBLK_KMALLOC_META
;
612 emeta
->buf
= kmalloc(lm
->emeta_len
[0], GFP_KERNEL
);
615 goto fail_free_emeta
;
618 emeta
->nr_entries
= lm
->emeta_sec
[0];
619 l_mg
->eline_meta
[i
] = emeta
;
623 l_mg
->vsc_list
= kcalloc(l_mg
->nr_lines
, sizeof(__le32
), GFP_KERNEL
);
625 goto fail_free_emeta
;
627 for (i
= 0; i
< l_mg
->nr_lines
; i
++)
628 l_mg
->vsc_list
[i
] = cpu_to_le32(EMPTY_ENTRY
);
634 vfree(l_mg
->eline_meta
[i
]->buf
);
635 kfree(l_mg
->eline_meta
[i
]);
639 for (i
= 0; i
< PBLK_DATA_LINES
; i
++)
640 kfree(l_mg
->sline_meta
[i
]);
645 static int pblk_lines_init(struct pblk
*pblk
)
647 struct nvm_tgt_dev
*dev
= pblk
->dev
;
648 struct nvm_geo
*geo
= &dev
->geo
;
649 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
650 struct pblk_line_meta
*lm
= &pblk
->lm
;
651 struct pblk_line
*line
;
652 unsigned int smeta_len
, emeta_len
;
653 long nr_bad_blks
, nr_free_blks
;
654 int bb_distance
, max_write_ppas
, mod
;
657 pblk
->min_write_pgs
= geo
->sec_per_pl
* (geo
->sec_size
/ PAGE_SIZE
);
658 max_write_ppas
= pblk
->min_write_pgs
* geo
->nr_luns
;
659 pblk
->max_write_pgs
= (max_write_ppas
< nvm_max_phys_sects(dev
)) ?
660 max_write_ppas
: nvm_max_phys_sects(dev
);
661 pblk_set_sec_per_write(pblk
, pblk
->min_write_pgs
);
663 if (pblk
->max_write_pgs
> PBLK_MAX_REQ_ADDRS
) {
664 pr_err("pblk: cannot support device max_phys_sect\n");
668 div_u64_rem(geo
->sec_per_blk
, pblk
->min_write_pgs
, &mod
);
670 pr_err("pblk: bad configuration of sectors/pages\n");
674 l_mg
->nr_lines
= geo
->blks_per_lun
;
675 l_mg
->log_line
= l_mg
->data_line
= NULL
;
676 l_mg
->l_seq_nr
= l_mg
->d_seq_nr
= 0;
677 l_mg
->nr_free_lines
= 0;
678 bitmap_zero(&l_mg
->meta_bitmap
, PBLK_DATA_LINES
);
680 lm
->sec_per_line
= geo
->sec_per_blk
* geo
->nr_luns
;
681 lm
->blk_per_line
= geo
->nr_luns
;
682 lm
->blk_bitmap_len
= BITS_TO_LONGS(geo
->nr_luns
) * sizeof(long);
683 lm
->sec_bitmap_len
= BITS_TO_LONGS(lm
->sec_per_line
) * sizeof(long);
684 lm
->lun_bitmap_len
= BITS_TO_LONGS(geo
->nr_luns
) * sizeof(long);
685 lm
->mid_thrs
= lm
->sec_per_line
/ 2;
686 lm
->high_thrs
= lm
->sec_per_line
/ 4;
687 lm
->meta_distance
= (geo
->nr_luns
/ 2) * pblk
->min_write_pgs
;
689 /* Calculate necessary pages for smeta. See comment over struct
690 * line_smeta definition
694 lm
->smeta_sec
= i
* geo
->sec_per_pl
;
695 lm
->smeta_len
= lm
->smeta_sec
* geo
->sec_size
;
697 smeta_len
= sizeof(struct line_smeta
) + lm
->lun_bitmap_len
;
698 if (smeta_len
> lm
->smeta_len
) {
703 /* Calculate necessary pages for emeta. See comment over struct
704 * line_emeta definition
708 lm
->emeta_sec
[0] = i
* geo
->sec_per_pl
;
709 lm
->emeta_len
[0] = lm
->emeta_sec
[0] * geo
->sec_size
;
711 emeta_len
= calc_emeta_len(pblk
);
712 if (emeta_len
> lm
->emeta_len
[0]) {
717 lm
->emeta_bb
= geo
->nr_luns
- i
;
718 lm
->min_blk_line
= 1 + DIV_ROUND_UP(lm
->smeta_sec
+ lm
->emeta_sec
[0],
720 if (lm
->min_blk_line
> lm
->blk_per_line
) {
721 pr_err("pblk: config. not supported. Min. LUN in line:%d\n",
727 ret
= pblk_lines_alloc_metadata(pblk
);
731 l_mg
->bb_template
= kzalloc(lm
->sec_bitmap_len
, GFP_KERNEL
);
732 if (!l_mg
->bb_template
) {
737 l_mg
->bb_aux
= kzalloc(lm
->sec_bitmap_len
, GFP_KERNEL
);
740 goto fail_free_bb_template
;
743 bb_distance
= (geo
->nr_luns
) * geo
->sec_per_pl
;
744 for (i
= 0; i
< lm
->sec_per_line
; i
+= bb_distance
)
745 bitmap_set(l_mg
->bb_template
, i
, geo
->sec_per_pl
);
747 INIT_LIST_HEAD(&l_mg
->free_list
);
748 INIT_LIST_HEAD(&l_mg
->corrupt_list
);
749 INIT_LIST_HEAD(&l_mg
->bad_list
);
750 INIT_LIST_HEAD(&l_mg
->gc_full_list
);
751 INIT_LIST_HEAD(&l_mg
->gc_high_list
);
752 INIT_LIST_HEAD(&l_mg
->gc_mid_list
);
753 INIT_LIST_HEAD(&l_mg
->gc_low_list
);
754 INIT_LIST_HEAD(&l_mg
->gc_empty_list
);
756 INIT_LIST_HEAD(&l_mg
->emeta_list
);
758 l_mg
->gc_lists
[0] = &l_mg
->gc_high_list
;
759 l_mg
->gc_lists
[1] = &l_mg
->gc_mid_list
;
760 l_mg
->gc_lists
[2] = &l_mg
->gc_low_list
;
762 spin_lock_init(&l_mg
->free_lock
);
763 spin_lock_init(&l_mg
->close_lock
);
764 spin_lock_init(&l_mg
->gc_lock
);
766 pblk
->lines
= kcalloc(l_mg
->nr_lines
, sizeof(struct pblk_line
),
770 goto fail_free_bb_aux
;
774 for (i
= 0; i
< l_mg
->nr_lines
; i
++) {
777 line
= &pblk
->lines
[i
];
781 line
->type
= PBLK_LINETYPE_FREE
;
782 line
->state
= PBLK_LINESTATE_FREE
;
783 line
->gc_group
= PBLK_LINEGC_NONE
;
784 line
->vsc
= &l_mg
->vsc_list
[i
];
785 spin_lock_init(&line
->lock
);
787 ret
= pblk_alloc_line_bitmaps(pblk
, line
);
789 goto fail_free_lines
;
791 nr_bad_blks
= pblk_bb_line(pblk
, line
, lm
->blk_per_line
);
792 if (nr_bad_blks
< 0 || nr_bad_blks
> lm
->blk_per_line
) {
793 pblk_free_line_bitmaps(line
);
795 goto fail_free_lines
;
798 blk_in_line
= lm
->blk_per_line
- nr_bad_blks
;
799 if (blk_in_line
< lm
->min_blk_line
) {
800 line
->state
= PBLK_LINESTATE_BAD
;
801 list_add_tail(&line
->list
, &l_mg
->bad_list
);
805 nr_free_blks
+= blk_in_line
;
806 atomic_set(&line
->blk_in_line
, blk_in_line
);
808 l_mg
->nr_free_lines
++;
809 list_add_tail(&line
->list
, &l_mg
->free_list
);
812 pblk_set_provision(pblk
, nr_free_blks
);
814 /* Cleanup per-LUN bad block lists - managed within lines on run-time */
815 for (i
= 0; i
< geo
->nr_luns
; i
++)
816 kfree(pblk
->luns
[i
].bb_list
);
821 pblk_free_line_bitmaps(&pblk
->lines
[i
]);
824 fail_free_bb_template
:
825 kfree(l_mg
->bb_template
);
827 pblk_line_meta_free(pblk
);
829 for (i
= 0; i
< geo
->nr_luns
; i
++)
830 kfree(pblk
->luns
[i
].bb_list
);
835 static int pblk_writer_init(struct pblk
*pblk
)
837 setup_timer(&pblk
->wtimer
, pblk_write_timer_fn
, (unsigned long)pblk
);
838 mod_timer(&pblk
->wtimer
, jiffies
+ msecs_to_jiffies(100));
840 pblk
->writer_ts
= kthread_create(pblk_write_ts
, pblk
, "pblk-writer-t");
841 if (IS_ERR(pblk
->writer_ts
)) {
842 pr_err("pblk: could not allocate writer kthread\n");
843 return PTR_ERR(pblk
->writer_ts
);
849 static void pblk_writer_stop(struct pblk
*pblk
)
851 /* The pipeline must be stopped and the write buffer emptied before the
852 * write thread is stopped
854 WARN(pblk_rb_read_count(&pblk
->rwb
),
855 "Stopping not fully persisted write buffer\n");
857 WARN(pblk_rb_sync_count(&pblk
->rwb
),
858 "Stopping not fully synced write buffer\n");
861 kthread_stop(pblk
->writer_ts
);
862 del_timer(&pblk
->wtimer
);
865 static void pblk_free(struct pblk
*pblk
)
867 pblk_luns_free(pblk
);
868 pblk_lines_free(pblk
);
869 pblk_line_meta_free(pblk
);
870 pblk_core_free(pblk
);
876 static void pblk_tear_down(struct pblk
*pblk
)
878 pblk_pipeline_stop(pblk
);
879 pblk_writer_stop(pblk
);
880 pblk_rb_sync_l2p(&pblk
->rwb
);
882 pblk_rl_free(&pblk
->rl
);
884 pr_debug("pblk: consistent tear down\n");
887 static void pblk_exit(void *private)
889 struct pblk
*pblk
= private;
891 down_write(&pblk_lock
);
893 pblk_tear_down(pblk
);
895 up_write(&pblk_lock
);
898 static sector_t
pblk_capacity(void *private)
900 struct pblk
*pblk
= private;
902 return pblk
->capacity
* NR_PHY_IN_LOG
;
905 static void *pblk_init(struct nvm_tgt_dev
*dev
, struct gendisk
*tdisk
,
908 struct nvm_geo
*geo
= &dev
->geo
;
909 struct request_queue
*bqueue
= dev
->q
;
910 struct request_queue
*tqueue
= tdisk
->queue
;
914 if (dev
->identity
.dom
& NVM_RSP_L2P
) {
915 pr_err("pblk: device-side L2P table not supported. (%x)\n",
917 return ERR_PTR(-EINVAL
);
920 pblk
= kzalloc(sizeof(struct pblk
), GFP_KERNEL
);
922 return ERR_PTR(-ENOMEM
);
926 pblk
->state
= PBLK_STATE_RUNNING
;
927 pblk
->gc
.gc_enabled
= 0;
929 spin_lock_init(&pblk
->trans_lock
);
930 spin_lock_init(&pblk
->lock
);
932 if (flags
& NVM_TARGET_FACTORY
)
933 pblk_setup_uuid(pblk
);
935 #ifdef CONFIG_NVM_DEBUG
936 atomic_long_set(&pblk
->inflight_writes
, 0);
937 atomic_long_set(&pblk
->padded_writes
, 0);
938 atomic_long_set(&pblk
->padded_wb
, 0);
939 atomic_long_set(&pblk
->nr_flush
, 0);
940 atomic_long_set(&pblk
->req_writes
, 0);
941 atomic_long_set(&pblk
->sub_writes
, 0);
942 atomic_long_set(&pblk
->sync_writes
, 0);
943 atomic_long_set(&pblk
->inflight_reads
, 0);
944 atomic_long_set(&pblk
->cache_reads
, 0);
945 atomic_long_set(&pblk
->sync_reads
, 0);
946 atomic_long_set(&pblk
->recov_writes
, 0);
947 atomic_long_set(&pblk
->recov_writes
, 0);
948 atomic_long_set(&pblk
->recov_gc_writes
, 0);
949 atomic_long_set(&pblk
->recov_gc_reads
, 0);
952 atomic_long_set(&pblk
->read_failed
, 0);
953 atomic_long_set(&pblk
->read_empty
, 0);
954 atomic_long_set(&pblk
->read_high_ecc
, 0);
955 atomic_long_set(&pblk
->read_failed_gc
, 0);
956 atomic_long_set(&pblk
->write_failed
, 0);
957 atomic_long_set(&pblk
->erase_failed
, 0);
959 ret
= pblk_luns_init(pblk
, dev
->luns
);
961 pr_err("pblk: could not initialize luns\n");
965 ret
= pblk_lines_init(pblk
);
967 pr_err("pblk: could not initialize lines\n");
971 ret
= pblk_core_init(pblk
);
973 pr_err("pblk: could not initialize core\n");
974 goto fail_free_line_meta
;
977 ret
= pblk_l2p_init(pblk
);
979 pr_err("pblk: could not initialize maps\n");
983 ret
= pblk_lines_configure(pblk
, flags
);
985 pr_err("pblk: could not configure lines\n");
989 ret
= pblk_writer_init(pblk
);
991 pr_err("pblk: could not initialize write thread\n");
992 goto fail_free_lines
;
995 ret
= pblk_gc_init(pblk
);
997 pr_err("pblk: could not initialize gc\n");
998 goto fail_stop_writer
;
1001 /* inherit the size from the underlying device */
1002 blk_queue_logical_block_size(tqueue
, queue_physical_block_size(bqueue
));
1003 blk_queue_max_hw_sectors(tqueue
, queue_max_hw_sectors(bqueue
));
1005 blk_queue_write_cache(tqueue
, true, false);
1007 tqueue
->limits
.discard_granularity
= geo
->pgs_per_blk
* geo
->pfpg_size
;
1008 tqueue
->limits
.discard_alignment
= 0;
1009 blk_queue_max_discard_sectors(tqueue
, UINT_MAX
>> 9);
1010 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD
, tqueue
);
1012 pr_info("pblk init: luns:%u, lines:%d, secs:%llu, buf entries:%u\n",
1013 geo
->nr_luns
, pblk
->l_mg
.nr_lines
,
1014 (unsigned long long)pblk
->rl
.nr_secs
,
1015 pblk
->rwb
.nr_entries
);
1017 wake_up_process(pblk
->writer_ts
);
1021 pblk_writer_stop(pblk
);
1023 pblk_lines_free(pblk
);
1025 pblk_l2p_free(pblk
);
1027 pblk_core_free(pblk
);
1028 fail_free_line_meta
:
1029 pblk_line_meta_free(pblk
);
1031 pblk_luns_free(pblk
);
1034 return ERR_PTR(ret
);
1037 /* physical block device target */
1038 static struct nvm_tgt_type tt_pblk
= {
1040 .version
= {1, 0, 0},
1042 .make_rq
= pblk_make_rq
,
1043 .capacity
= pblk_capacity
,
1048 .sysfs_init
= pblk_sysfs_init
,
1049 .sysfs_exit
= pblk_sysfs_exit
,
1052 static int __init
pblk_module_init(void)
1056 pblk_bio_set
= bioset_create(BIO_POOL_SIZE
, 0, 0);
1059 ret
= nvm_register_tgt_type(&tt_pblk
);
1061 bioset_free(pblk_bio_set
);
1065 static void pblk_module_exit(void)
1067 bioset_free(pblk_bio_set
);
1068 nvm_unregister_tgt_type(&tt_pblk
);
1071 module_init(pblk_module_init
);
1072 module_exit(pblk_module_exit
);
1073 MODULE_AUTHOR("Javier Gonzalez <javier@cnexlabs.com>");
1074 MODULE_AUTHOR("Matias Bjorling <matias@cnexlabs.com>");
1075 MODULE_LICENSE("GPL v2");
1076 MODULE_DESCRIPTION("Physical Block-Device for Open-Channel SSDs");