2 * Copyright (C) 2008 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
18 * Based on jffs2 zlib code:
19 * Copyright © 2001-2007 Red Hat, Inc.
20 * Created by David Woodhouse <dwmw2@infradead.org>
23 #include <linux/kernel.h>
24 #include <linux/slab.h>
25 #include <linux/zlib.h>
26 #include <linux/zutil.h>
27 #include <linux/vmalloc.h>
28 #include <linux/init.h>
29 #include <linux/err.h>
30 #include <linux/sched.h>
31 #include <linux/pagemap.h>
32 #include <linux/bio.h>
33 #include "compression.h"
35 /* Plan: call deflate() with avail_in == *sourcelen,
36 avail_out = *dstlen - 12 and flush == Z_FINISH.
37 If it doesn't manage to finish, call it again with
38 avail_in == 0 and avail_out set to the remaining 12
39 bytes for it to clean up.
40 Q: Is 12 bytes sufficient?
42 #define STREAM_END_SPACE 12
48 struct list_head list
;
51 static LIST_HEAD(idle_workspace
);
52 static DEFINE_SPINLOCK(workspace_lock
);
53 static unsigned long num_workspace
;
54 static atomic_t alloc_workspace
= ATOMIC_INIT(0);
55 static DECLARE_WAIT_QUEUE_HEAD(workspace_wait
);
58 * this finds an available zlib workspace or allocates a new one
59 * NULL or an ERR_PTR is returned if things go bad.
61 static struct workspace
*find_zlib_workspace(void)
63 struct workspace
*workspace
;
65 int cpus
= num_online_cpus();
68 spin_lock(&workspace_lock
);
69 if (!list_empty(&idle_workspace
)) {
70 workspace
= list_entry(idle_workspace
.next
, struct workspace
,
72 list_del(&workspace
->list
);
74 spin_unlock(&workspace_lock
);
78 if (atomic_read(&alloc_workspace
) > cpus
) {
81 spin_unlock(&workspace_lock
);
82 prepare_to_wait(&workspace_wait
, &wait
, TASK_UNINTERRUPTIBLE
);
83 if (atomic_read(&alloc_workspace
) > cpus
&& !num_workspace
)
85 finish_wait(&workspace_wait
, &wait
);
88 atomic_inc(&alloc_workspace
);
89 spin_unlock(&workspace_lock
);
91 workspace
= kzalloc(sizeof(*workspace
), GFP_NOFS
);
97 workspace
->def_strm
.workspace
= vmalloc(zlib_deflate_workspacesize());
98 if (!workspace
->def_strm
.workspace
) {
102 workspace
->inf_strm
.workspace
= vmalloc(zlib_inflate_workspacesize());
103 if (!workspace
->inf_strm
.workspace
) {
107 workspace
->buf
= kmalloc(PAGE_CACHE_SIZE
, GFP_NOFS
);
108 if (!workspace
->buf
) {
115 vfree(workspace
->inf_strm
.workspace
);
117 vfree(workspace
->def_strm
.workspace
);
120 atomic_dec(&alloc_workspace
);
121 wake_up(&workspace_wait
);
126 * put a workspace struct back on the list or free it if we have enough
127 * idle ones sitting around
129 static int free_workspace(struct workspace
*workspace
)
131 spin_lock(&workspace_lock
);
132 if (num_workspace
< num_online_cpus()) {
133 list_add_tail(&workspace
->list
, &idle_workspace
);
135 spin_unlock(&workspace_lock
);
136 if (waitqueue_active(&workspace_wait
))
137 wake_up(&workspace_wait
);
140 spin_unlock(&workspace_lock
);
141 vfree(workspace
->def_strm
.workspace
);
142 vfree(workspace
->inf_strm
.workspace
);
143 kfree(workspace
->buf
);
146 atomic_dec(&alloc_workspace
);
147 if (waitqueue_active(&workspace_wait
))
148 wake_up(&workspace_wait
);
153 * cleanup function for module exit
155 static void free_workspaces(void)
157 struct workspace
*workspace
;
158 while (!list_empty(&idle_workspace
)) {
159 workspace
= list_entry(idle_workspace
.next
, struct workspace
,
161 list_del(&workspace
->list
);
162 vfree(workspace
->def_strm
.workspace
);
163 vfree(workspace
->inf_strm
.workspace
);
164 kfree(workspace
->buf
);
166 atomic_dec(&alloc_workspace
);
171 * given an address space and start/len, compress the bytes.
173 * pages are allocated to hold the compressed result and stored
176 * out_pages is used to return the number of pages allocated. There
177 * may be pages allocated even if we return an error
179 * total_in is used to return the number of bytes actually read. It
180 * may be smaller then len if we had to exit early because we
181 * ran out of room in the pages array or because we cross the
184 * total_out is used to return the total number of compressed bytes
186 * max_out tells us the max number of bytes that we're allowed to
189 int btrfs_zlib_compress_pages(struct address_space
*mapping
,
190 u64 start
, unsigned long len
,
192 unsigned long nr_dest_pages
,
193 unsigned long *out_pages
,
194 unsigned long *total_in
,
195 unsigned long *total_out
,
196 unsigned long max_out
)
199 struct workspace
*workspace
;
203 struct page
*in_page
= NULL
;
204 struct page
*out_page
= NULL
;
205 unsigned long bytes_left
;
211 workspace
= find_zlib_workspace();
212 if (IS_ERR(workspace
))
215 if (Z_OK
!= zlib_deflateInit(&workspace
->def_strm
, 3)) {
216 printk(KERN_WARNING
"deflateInit failed\n");
221 workspace
->def_strm
.total_in
= 0;
222 workspace
->def_strm
.total_out
= 0;
224 in_page
= find_get_page(mapping
, start
>> PAGE_CACHE_SHIFT
);
225 data_in
= kmap(in_page
);
227 out_page
= alloc_page(GFP_NOFS
| __GFP_HIGHMEM
);
228 cpage_out
= kmap(out_page
);
232 workspace
->def_strm
.next_in
= data_in
;
233 workspace
->def_strm
.next_out
= cpage_out
;
234 workspace
->def_strm
.avail_out
= PAGE_CACHE_SIZE
;
235 workspace
->def_strm
.avail_in
= min(len
, PAGE_CACHE_SIZE
);
237 while (workspace
->def_strm
.total_in
< len
) {
238 ret
= zlib_deflate(&workspace
->def_strm
, Z_SYNC_FLUSH
);
240 printk(KERN_DEBUG
"btrfs deflate in loop returned %d\n",
242 zlib_deflateEnd(&workspace
->def_strm
);
247 /* we're making it bigger, give up */
248 if (workspace
->def_strm
.total_in
> 8192 &&
249 workspace
->def_strm
.total_in
<
250 workspace
->def_strm
.total_out
) {
254 /* we need another page for writing out. Test this
255 * before the total_in so we will pull in a new page for
256 * the stream end if required
258 if (workspace
->def_strm
.avail_out
== 0) {
260 if (nr_pages
== nr_dest_pages
) {
265 out_page
= alloc_page(GFP_NOFS
| __GFP_HIGHMEM
);
266 cpage_out
= kmap(out_page
);
267 pages
[nr_pages
] = out_page
;
269 workspace
->def_strm
.avail_out
= PAGE_CACHE_SIZE
;
270 workspace
->def_strm
.next_out
= cpage_out
;
273 if (workspace
->def_strm
.total_in
>= len
)
276 /* we've read in a full page, get a new one */
277 if (workspace
->def_strm
.avail_in
== 0) {
278 if (workspace
->def_strm
.total_out
> max_out
)
281 bytes_left
= len
- workspace
->def_strm
.total_in
;
283 page_cache_release(in_page
);
285 start
+= PAGE_CACHE_SIZE
;
286 in_page
= find_get_page(mapping
,
287 start
>> PAGE_CACHE_SHIFT
);
288 data_in
= kmap(in_page
);
289 workspace
->def_strm
.avail_in
= min(bytes_left
,
291 workspace
->def_strm
.next_in
= data_in
;
294 workspace
->def_strm
.avail_in
= 0;
295 ret
= zlib_deflate(&workspace
->def_strm
, Z_FINISH
);
296 zlib_deflateEnd(&workspace
->def_strm
);
298 if (ret
!= Z_STREAM_END
) {
303 if (workspace
->def_strm
.total_out
>= workspace
->def_strm
.total_in
) {
309 *total_out
= workspace
->def_strm
.total_out
;
310 *total_in
= workspace
->def_strm
.total_in
;
312 *out_pages
= nr_pages
;
318 page_cache_release(in_page
);
320 free_workspace(workspace
);
325 * pages_in is an array of pages with compressed data.
327 * disk_start is the starting logical offset of this array in the file
329 * bvec is a bio_vec of pages from the file that we want to decompress into
331 * vcnt is the count of pages in the biovec
333 * srclen is the number of bytes in pages_in
335 * The basic idea is that we have a bio that was created by readpages.
336 * The pages in the bio are for the uncompressed data, and they may not
337 * be contiguous. They all correspond to the range of bytes covered by
338 * the compressed extent.
340 int btrfs_zlib_decompress_biovec(struct page
**pages_in
,
342 struct bio_vec
*bvec
,
347 int wbits
= MAX_WBITS
;
348 struct workspace
*workspace
;
350 size_t total_out
= 0;
351 unsigned long page_bytes_left
;
352 unsigned long page_in_index
= 0;
353 unsigned long page_out_index
= 0;
354 struct page
*page_out
;
355 unsigned long total_pages_in
= (srclen
+ PAGE_CACHE_SIZE
- 1) /
357 unsigned long buf_start
;
358 unsigned long buf_offset
;
360 unsigned long working_bytes
;
361 unsigned long pg_offset
;
362 unsigned long start_byte
;
363 unsigned long current_buf_start
;
366 workspace
= find_zlib_workspace();
367 if (IS_ERR(workspace
))
370 data_in
= kmap(pages_in
[page_in_index
]);
371 workspace
->inf_strm
.next_in
= data_in
;
372 workspace
->inf_strm
.avail_in
= min_t(size_t, srclen
, PAGE_CACHE_SIZE
);
373 workspace
->inf_strm
.total_in
= 0;
375 workspace
->inf_strm
.total_out
= 0;
376 workspace
->inf_strm
.next_out
= workspace
->buf
;
377 workspace
->inf_strm
.avail_out
= PAGE_CACHE_SIZE
;
378 page_out
= bvec
[page_out_index
].bv_page
;
379 page_bytes_left
= PAGE_CACHE_SIZE
;
382 /* If it's deflate, and it's got no preset dictionary, then
383 we can tell zlib to skip the adler32 check. */
384 if (srclen
> 2 && !(data_in
[1] & PRESET_DICT
) &&
385 ((data_in
[0] & 0x0f) == Z_DEFLATED
) &&
386 !(((data_in
[0]<<8) + data_in
[1]) % 31)) {
388 wbits
= -((data_in
[0] >> 4) + 8);
389 workspace
->inf_strm
.next_in
+= 2;
390 workspace
->inf_strm
.avail_in
-= 2;
393 if (Z_OK
!= zlib_inflateInit2(&workspace
->inf_strm
, wbits
)) {
394 printk(KERN_WARNING
"inflateInit failed\n");
398 while (workspace
->inf_strm
.total_in
< srclen
) {
399 ret
= zlib_inflate(&workspace
->inf_strm
, Z_NO_FLUSH
);
400 if (ret
!= Z_OK
&& ret
!= Z_STREAM_END
)
403 * buf start is the byte offset we're of the start of
404 * our workspace buffer
406 buf_start
= total_out
;
408 /* total_out is the last byte of the workspace buffer */
409 total_out
= workspace
->inf_strm
.total_out
;
411 working_bytes
= total_out
- buf_start
;
414 * start byte is the first byte of the page we're currently
415 * copying into relative to the start of the compressed data.
417 start_byte
= page_offset(page_out
) - disk_start
;
419 if (working_bytes
== 0) {
420 /* we didn't make progress in this inflate
423 if (ret
!= Z_STREAM_END
)
428 /* we haven't yet hit data corresponding to this page */
429 if (total_out
<= start_byte
)
433 * the start of the data we care about is offset into
434 * the middle of our working buffer
436 if (total_out
> start_byte
&& buf_start
< start_byte
) {
437 buf_offset
= start_byte
- buf_start
;
438 working_bytes
-= buf_offset
;
442 current_buf_start
= buf_start
;
444 /* copy bytes from the working buffer into the pages */
445 while (working_bytes
> 0) {
446 bytes
= min(PAGE_CACHE_SIZE
- pg_offset
,
447 PAGE_CACHE_SIZE
- buf_offset
);
448 bytes
= min(bytes
, working_bytes
);
449 kaddr
= kmap_atomic(page_out
, KM_USER0
);
450 memcpy(kaddr
+ pg_offset
, workspace
->buf
+ buf_offset
,
452 kunmap_atomic(kaddr
, KM_USER0
);
453 flush_dcache_page(page_out
);
456 page_bytes_left
-= bytes
;
458 working_bytes
-= bytes
;
459 current_buf_start
+= bytes
;
461 /* check if we need to pick another page */
462 if (page_bytes_left
== 0) {
464 if (page_out_index
>= vcnt
) {
469 page_out
= bvec
[page_out_index
].bv_page
;
471 page_bytes_left
= PAGE_CACHE_SIZE
;
472 start_byte
= page_offset(page_out
) - disk_start
;
475 * make sure our new page is covered by this
478 if (total_out
<= start_byte
)
481 /* the next page in the biovec might not
482 * be adjacent to the last page, but it
483 * might still be found inside this working
484 * buffer. bump our offset pointer
486 if (total_out
> start_byte
&&
487 current_buf_start
< start_byte
) {
488 buf_offset
= start_byte
- buf_start
;
489 working_bytes
= total_out
- start_byte
;
490 current_buf_start
= buf_start
+
496 workspace
->inf_strm
.next_out
= workspace
->buf
;
497 workspace
->inf_strm
.avail_out
= PAGE_CACHE_SIZE
;
499 if (workspace
->inf_strm
.avail_in
== 0) {
501 kunmap(pages_in
[page_in_index
]);
503 if (page_in_index
>= total_pages_in
) {
507 data_in
= kmap(pages_in
[page_in_index
]);
508 workspace
->inf_strm
.next_in
= data_in
;
509 tmp
= srclen
- workspace
->inf_strm
.total_in
;
510 workspace
->inf_strm
.avail_in
= min(tmp
,
514 if (ret
!= Z_STREAM_END
)
519 zlib_inflateEnd(&workspace
->inf_strm
);
521 kunmap(pages_in
[page_in_index
]);
523 free_workspace(workspace
);
528 * a less complex decompression routine. Our compressed data fits in a
529 * single page, and we want to read a single page out of it.
530 * start_byte tells us the offset into the compressed data we're interested in
532 int btrfs_zlib_decompress(unsigned char *data_in
,
533 struct page
*dest_page
,
534 unsigned long start_byte
,
535 size_t srclen
, size_t destlen
)
538 int wbits
= MAX_WBITS
;
539 struct workspace
*workspace
;
540 unsigned long bytes_left
= destlen
;
541 unsigned long total_out
= 0;
544 if (destlen
> PAGE_CACHE_SIZE
)
547 workspace
= find_zlib_workspace();
548 if (IS_ERR(workspace
))
551 workspace
->inf_strm
.next_in
= data_in
;
552 workspace
->inf_strm
.avail_in
= srclen
;
553 workspace
->inf_strm
.total_in
= 0;
555 workspace
->inf_strm
.next_out
= workspace
->buf
;
556 workspace
->inf_strm
.avail_out
= PAGE_CACHE_SIZE
;
557 workspace
->inf_strm
.total_out
= 0;
558 /* If it's deflate, and it's got no preset dictionary, then
559 we can tell zlib to skip the adler32 check. */
560 if (srclen
> 2 && !(data_in
[1] & PRESET_DICT
) &&
561 ((data_in
[0] & 0x0f) == Z_DEFLATED
) &&
562 !(((data_in
[0]<<8) + data_in
[1]) % 31)) {
564 wbits
= -((data_in
[0] >> 4) + 8);
565 workspace
->inf_strm
.next_in
+= 2;
566 workspace
->inf_strm
.avail_in
-= 2;
569 if (Z_OK
!= zlib_inflateInit2(&workspace
->inf_strm
, wbits
)) {
570 printk(KERN_WARNING
"inflateInit failed\n");
575 while (bytes_left
> 0) {
576 unsigned long buf_start
;
577 unsigned long buf_offset
;
579 unsigned long pg_offset
= 0;
581 ret
= zlib_inflate(&workspace
->inf_strm
, Z_NO_FLUSH
);
582 if (ret
!= Z_OK
&& ret
!= Z_STREAM_END
)
585 buf_start
= total_out
;
586 total_out
= workspace
->inf_strm
.total_out
;
588 if (total_out
== buf_start
) {
593 if (total_out
<= start_byte
)
596 if (total_out
> start_byte
&& buf_start
< start_byte
)
597 buf_offset
= start_byte
- buf_start
;
601 bytes
= min(PAGE_CACHE_SIZE
- pg_offset
,
602 PAGE_CACHE_SIZE
- buf_offset
);
603 bytes
= min(bytes
, bytes_left
);
605 kaddr
= kmap_atomic(dest_page
, KM_USER0
);
606 memcpy(kaddr
+ pg_offset
, workspace
->buf
+ buf_offset
, bytes
);
607 kunmap_atomic(kaddr
, KM_USER0
);
612 workspace
->inf_strm
.next_out
= workspace
->buf
;
613 workspace
->inf_strm
.avail_out
= PAGE_CACHE_SIZE
;
616 if (ret
!= Z_STREAM_END
&& bytes_left
!= 0)
621 zlib_inflateEnd(&workspace
->inf_strm
);
623 free_workspace(workspace
);
627 void btrfs_zlib_exit(void)