ntfs-3g ver. 2009.11.14
[tomato.git] / release / src / router / ntfs-3g / libntfs-3g / compress.c
blob4db1826d3905f96cf65ca02f5e956ab1ad3810ce
1 /**
2 * compress.c - Compressed attribute handling code. Originated from the Linux-NTFS
3 * project.
5 * Copyright (c) 2004-2005 Anton Altaparmakov
6 * Copyright (c) 2004-2006 Szabolcs Szakacsits
7 * Copyright (c) 2005 Yura Pakhuchiy
8 * Copyright (c) 2009 Jean-Pierre Andre
10 * This program/include file is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License as published
12 * by the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program/include file is distributed in the hope that it will be
16 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
17 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program (in the main directory of the NTFS-3G
22 * distribution in the file COPYING); if not, write to the Free Software
23 * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 * A part of the compression algorithm is based on lzhuf.c whose header
26 * describes the roles of the original authors (with no apparent copyright
27 * notice, and according to http://home.earthlink.net/~neilbawd/pall.html
28 * this was put into public domain in 1988 by Haruhiko OKUMURA).
30 * LZHUF.C English version 1.0
31 * Based on Japanese version 29-NOV-1988
32 * LZSS coded by Haruhiko OKUMURA
33 * Adaptive Huffman Coding coded by Haruyasu YOSHIZAKI
34 * Edited and translated to English by Kenji RIKITAKE
37 #ifdef HAVE_CONFIG_H
38 #include "config.h"
39 #endif
41 #ifdef HAVE_STDIO_H
42 #include <stdio.h>
43 #endif
44 #ifdef HAVE_STRING_H
45 #include <string.h>
46 #endif
47 #ifdef HAVE_STDLIB_H
48 #include <stdlib.h>
49 #endif
50 #ifdef HAVE_ERRNO_H
51 #include <errno.h>
52 #endif
54 #include "attrib.h"
55 #include "debug.h"
56 #include "volume.h"
57 #include "types.h"
58 #include "layout.h"
59 #include "runlist.h"
60 #include "compress.h"
61 #include "lcnalloc.h"
62 #include "logging.h"
63 #include "misc.h"
65 /**
66 * enum ntfs_compression_constants - constants used in the compression code
68 typedef enum {
69 /* Token types and access mask. */
70 NTFS_SYMBOL_TOKEN = 0,
71 NTFS_PHRASE_TOKEN = 1,
72 NTFS_TOKEN_MASK = 1,
74 /* Compression sub-block constants. */
75 NTFS_SB_SIZE_MASK = 0x0fff,
76 NTFS_SB_SIZE = 0x1000,
77 NTFS_SB_IS_COMPRESSED = 0x8000,
78 } ntfs_compression_constants;
80 #define THRESHOLD 3 /* minimal match length for compression */
81 #define NIL NTFS_SB_SIZE /* End of tree's node */
83 struct COMPRESS_CONTEXT {
84 const unsigned char *inbuf;
85 unsigned int len;
86 unsigned int nbt;
87 int match_position;
88 unsigned int match_length;
89 u16 lson[NTFS_SB_SIZE + 1];
90 u16 rson[NTFS_SB_SIZE + 257];
91 u16 dad[NTFS_SB_SIZE + 1];
92 } ;
95 * Initialize the match tree
98 static void ntfs_init_compress_tree(struct COMPRESS_CONTEXT *pctx)
100 int i;
102 for (i = NTFS_SB_SIZE + 1; i <= NTFS_SB_SIZE + 256; i++)
103 pctx->rson[i] = NIL; /* root */
104 for (i = 0; i < NTFS_SB_SIZE; i++)
105 pctx->dad[i] = NIL; /* node */
109 * Insert a new node into match tree for quickly locating
110 * further similar strings
113 static void ntfs_new_node (struct COMPRESS_CONTEXT *pctx,
114 unsigned int r)
116 unsigned int pp;
117 BOOL less;
118 BOOL done;
119 const unsigned char *key;
120 int c;
121 unsigned int mxi;
122 unsigned int mxl;
124 mxl = (1 << (16 - pctx->nbt)) + 2;
125 less = FALSE;
126 done = FALSE;
127 key = &pctx->inbuf[r];
128 pp = NTFS_SB_SIZE + 1 + key[0];
129 pctx->rson[r] = pctx->lson[r] = NIL;
130 pctx->match_length = 0;
131 do {
132 if (!less) {
133 if (pctx->rson[pp] != NIL)
134 pp = pctx->rson[pp];
135 else {
136 pctx->rson[pp] = r;
137 pctx->dad[r] = pp;
138 done = TRUE;
140 } else {
141 if (pctx->lson[pp] != NIL)
142 pp = pctx->lson[pp];
143 else {
144 pctx->lson[pp] = r;
145 pctx->dad[r] = pp;
146 done = TRUE;
149 if (!done) {
150 register unsigned int i;
151 register const unsigned char *p1,*p2;
153 i = 1;
154 p1 = key;
155 p2 = &pctx->inbuf[pp];
156 mxi = NTFS_SB_SIZE - r;
157 do {
158 } while ((p1[i] == p2[i]) && (++i < mxi));
159 less = (i < mxi) && (p1[i] < p2[i]);
160 if (i >= THRESHOLD) {
161 if (i > pctx->match_length) {
162 pctx->match_position =
163 r - pp + 2*NTFS_SB_SIZE - 1;
164 if ((pctx->match_length = i) > mxl) {
165 i = pctx->rson[pp];
166 pctx->rson[r] = i;
167 pctx->dad[i] = r;
168 i = pctx->lson[pp];
169 pctx->lson[r] = i;
170 pctx->dad[i] = r;
171 i = pctx->dad[pp];
172 pctx->dad[r] = i;
173 if (pctx->rson[i] == pp)
174 pctx->rson[i] = r;
175 else
176 pctx->lson[i] = r;
177 pctx->dad[pp] = NIL; /* remove pp */
178 done = TRUE;
179 pctx->match_length = mxl;
181 } else
182 if ((i == pctx->match_length)
183 && ((c = (r - pp + 2*NTFS_SB_SIZE - 1))
184 < pctx->match_position))
185 pctx->match_position = c;
188 } while (!done);
192 * Search for the longest previous string matching the
193 * current one
195 * Returns the end of the longest current string which matched
196 * or zero if there was a bug
199 static unsigned int ntfs_nextmatch(struct COMPRESS_CONTEXT *pctx, unsigned int rr, int dd)
201 unsigned int bestlen = 0;
203 do {
204 rr++;
205 if (pctx->match_length > 0)
206 pctx->match_length--;
207 if (!pctx->len) {
208 ntfs_log_error("compress bug : void run\n");
209 goto bug;
211 if (--pctx->len) {
212 if (rr >= NTFS_SB_SIZE) {
213 ntfs_log_error("compress bug : buffer overflow\n");
214 goto bug;
216 if (((rr + bestlen) < NTFS_SB_SIZE)) {
217 while ((unsigned int)(1 << pctx->nbt) <= (rr - 1))
218 pctx->nbt++;
219 ntfs_new_node(pctx,rr);
220 if (pctx->match_length > bestlen)
221 bestlen = pctx->match_length;
222 } else
223 if (dd > 0) {
224 rr += dd;
225 if ((int)pctx->match_length > dd)
226 pctx->match_length -= dd;
227 else
228 pctx->match_length = 0;
229 if ((int)pctx->len < dd) {
230 ntfs_log_error("compress bug : run overflows\n");
231 goto bug;
233 pctx->len -= dd;
234 dd = 0;
237 } while (dd-- > 0);
238 return (rr);
239 bug :
240 return (0);
244 * Compress an input block
246 * Returns the size of the compressed block (including header)
247 * or zero if there was an error
250 static unsigned int ntfs_compress_block(const char *inbuf, unsigned int size, char *outbuf)
252 struct COMPRESS_CONTEXT *pctx;
253 char *ptag;
254 int dd;
255 unsigned int rr;
256 unsigned int last_match_length;
257 unsigned int q;
258 unsigned int xout;
259 unsigned int ntag;
261 pctx = (struct COMPRESS_CONTEXT*)malloc(sizeof(struct COMPRESS_CONTEXT));
262 if (pctx) {
263 pctx->inbuf = (const unsigned char*)inbuf;
264 ntfs_init_compress_tree(pctx);
265 xout = 2;
266 ntag = 0;
267 ptag = &outbuf[xout++];
268 *ptag = 0;
269 rr = 0;
270 pctx->nbt = 4;
271 pctx->len = size;
272 pctx->match_length = 0;
273 ntfs_new_node(pctx,0);
274 do {
275 if (pctx->match_length > pctx->len)
276 pctx->match_length = pctx->len;
277 if (pctx->match_length < THRESHOLD) {
278 pctx->match_length = 1;
279 if (ntag >= 8) {
280 ntag = 0;
281 ptag = &outbuf[xout++];
282 *ptag = 0;
284 outbuf[xout++] = inbuf[rr];
285 ntag++;
286 } else {
287 while ((unsigned int)(1 << pctx->nbt) <= (rr - 1))
288 pctx->nbt++;
289 q = (pctx->match_position << (16 - pctx->nbt))
290 + pctx->match_length - THRESHOLD;
291 if (ntag >= 8) {
292 ntag = 0;
293 ptag = &outbuf[xout++];
294 *ptag = 0;
296 *ptag |= 1 << ntag++;
297 outbuf[xout++] = q & 255;
298 outbuf[xout++] = (q >> 8) & 255;
300 last_match_length = pctx->match_length;
301 dd = last_match_length;
302 if (dd-- > 0) {
303 rr = ntfs_nextmatch(pctx,rr,dd);
304 if (!rr)
305 goto bug;
308 * stop if input is exhausted or output has exceeded
309 * the maximum size. Two extra bytes have to be
310 * reserved in output buffer, as 3 bytes may be
311 * output in a loop.
313 } while ((pctx->len > 0)
314 && (rr < size) && (xout < (NTFS_SB_SIZE + 2)));
315 /* uncompressed must be full size, so accept if better */
316 if (xout < (NTFS_SB_SIZE + 2)) {
317 outbuf[0] = (xout - 3) & 255;
318 outbuf[1] = 0xb0 + (((xout - 3) >> 8) & 15);
319 } else {
320 memcpy(&outbuf[2],inbuf,size);
321 if (size < NTFS_SB_SIZE)
322 memset(&outbuf[size+2],0,NTFS_SB_SIZE - size);
323 outbuf[0] = 0xff;
324 outbuf[1] = 0x3f;
325 xout = NTFS_SB_SIZE + 2;
327 free(pctx);
328 } else {
329 xout = 0;
330 errno = ENOMEM;
332 return (xout); /* 0 for an error, > size if cannot compress */
333 bug :
334 return (0);
338 * ntfs_decompress - decompress a compression block into an array of pages
339 * @dest: buffer to which to write the decompressed data
340 * @dest_size: size of buffer @dest in bytes
341 * @cb_start: compression block to decompress
342 * @cb_size: size of compression block @cb_start in bytes
344 * This decompresses the compression block @cb_start into the destination
345 * buffer @dest.
347 * @cb_start is a pointer to the compression block which needs decompressing
348 * and @cb_size is the size of @cb_start in bytes (8-64kiB).
350 * Return 0 if success or -EOVERFLOW on error in the compressed stream.
352 static int ntfs_decompress(u8 *dest, const u32 dest_size,
353 u8 *const cb_start, const u32 cb_size)
356 * Pointers into the compressed data, i.e. the compression block (cb),
357 * and the therein contained sub-blocks (sb).
359 u8 *cb_end = cb_start + cb_size; /* End of cb. */
360 u8 *cb = cb_start; /* Current position in cb. */
361 u8 *cb_sb_start = cb; /* Beginning of the current sb in the cb. */
362 u8 *cb_sb_end; /* End of current sb / beginning of next sb. */
363 /* Variables for uncompressed data / destination. */
364 u8 *dest_end = dest + dest_size; /* End of dest buffer. */
365 u8 *dest_sb_start; /* Start of current sub-block in dest. */
366 u8 *dest_sb_end; /* End of current sb in dest. */
367 /* Variables for tag and token parsing. */
368 u8 tag; /* Current tag. */
369 int token; /* Loop counter for the eight tokens in tag. */
371 ntfs_log_trace("Entering, cb_size = 0x%x.\n", (unsigned)cb_size);
372 do_next_sb:
373 ntfs_log_debug("Beginning sub-block at offset = %d in the cb.\n",
374 (int)(cb - cb_start));
376 * Have we reached the end of the compression block or the end of the
377 * decompressed data? The latter can happen for example if the current
378 * position in the compression block is one byte before its end so the
379 * first two checks do not detect it.
381 if (cb == cb_end || !le16_to_cpup((le16*)cb) || dest == dest_end) {
382 ntfs_log_debug("Completed. Returning success (0).\n");
383 return 0;
385 /* Setup offset for the current sub-block destination. */
386 dest_sb_start = dest;
387 dest_sb_end = dest + NTFS_SB_SIZE;
388 /* Check that we are still within allowed boundaries. */
389 if (dest_sb_end > dest_end)
390 goto return_overflow;
391 /* Does the minimum size of a compressed sb overflow valid range? */
392 if (cb + 6 > cb_end)
393 goto return_overflow;
394 /* Setup the current sub-block source pointers and validate range. */
395 cb_sb_start = cb;
396 cb_sb_end = cb_sb_start + (le16_to_cpup((le16*)cb) & NTFS_SB_SIZE_MASK)
397 + 3;
398 if (cb_sb_end > cb_end)
399 goto return_overflow;
400 /* Now, we are ready to process the current sub-block (sb). */
401 if (!(le16_to_cpup((le16*)cb) & NTFS_SB_IS_COMPRESSED)) {
402 ntfs_log_debug("Found uncompressed sub-block.\n");
403 /* This sb is not compressed, just copy it into destination. */
404 /* Advance source position to first data byte. */
405 cb += 2;
406 /* An uncompressed sb must be full size. */
407 if (cb_sb_end - cb != NTFS_SB_SIZE)
408 goto return_overflow;
409 /* Copy the block and advance the source position. */
410 memcpy(dest, cb, NTFS_SB_SIZE);
411 cb += NTFS_SB_SIZE;
412 /* Advance destination position to next sub-block. */
413 dest += NTFS_SB_SIZE;
414 goto do_next_sb;
416 ntfs_log_debug("Found compressed sub-block.\n");
417 /* This sb is compressed, decompress it into destination. */
418 /* Forward to the first tag in the sub-block. */
419 cb += 2;
420 do_next_tag:
421 if (cb == cb_sb_end) {
422 /* Check if the decompressed sub-block was not full-length. */
423 if (dest < dest_sb_end) {
424 int nr_bytes = dest_sb_end - dest;
426 ntfs_log_debug("Filling incomplete sub-block with zeroes.\n");
427 /* Zero remainder and update destination position. */
428 memset(dest, 0, nr_bytes);
429 dest += nr_bytes;
431 /* We have finished the current sub-block. */
432 goto do_next_sb;
434 /* Check we are still in range. */
435 if (cb > cb_sb_end || dest > dest_sb_end)
436 goto return_overflow;
437 /* Get the next tag and advance to first token. */
438 tag = *cb++;
439 /* Parse the eight tokens described by the tag. */
440 for (token = 0; token < 8; token++, tag >>= 1) {
441 u16 lg, pt, length, max_non_overlap;
442 register u16 i;
443 u8 *dest_back_addr;
445 /* Check if we are done / still in range. */
446 if (cb >= cb_sb_end || dest > dest_sb_end)
447 break;
448 /* Determine token type and parse appropriately.*/
449 if ((tag & NTFS_TOKEN_MASK) == NTFS_SYMBOL_TOKEN) {
451 * We have a symbol token, copy the symbol across, and
452 * advance the source and destination positions.
454 *dest++ = *cb++;
455 /* Continue with the next token. */
456 continue;
459 * We have a phrase token. Make sure it is not the first tag in
460 * the sb as this is illegal and would confuse the code below.
462 if (dest == dest_sb_start)
463 goto return_overflow;
465 * Determine the number of bytes to go back (p) and the number
466 * of bytes to copy (l). We use an optimized algorithm in which
467 * we first calculate log2(current destination position in sb),
468 * which allows determination of l and p in O(1) rather than
469 * O(n). We just need an arch-optimized log2() function now.
471 lg = 0;
472 for (i = dest - dest_sb_start - 1; i >= 0x10; i >>= 1)
473 lg++;
474 /* Get the phrase token into i. */
475 pt = le16_to_cpup((le16*)cb);
477 * Calculate starting position of the byte sequence in
478 * the destination using the fact that p = (pt >> (12 - lg)) + 1
479 * and make sure we don't go too far back.
481 dest_back_addr = dest - (pt >> (12 - lg)) - 1;
482 if (dest_back_addr < dest_sb_start)
483 goto return_overflow;
484 /* Now calculate the length of the byte sequence. */
485 length = (pt & (0xfff >> lg)) + 3;
486 /* Verify destination is in range. */
487 if (dest + length > dest_sb_end)
488 goto return_overflow;
489 /* The number of non-overlapping bytes. */
490 max_non_overlap = dest - dest_back_addr;
491 if (length <= max_non_overlap) {
492 /* The byte sequence doesn't overlap, just copy it. */
493 memcpy(dest, dest_back_addr, length);
494 /* Advance destination pointer. */
495 dest += length;
496 } else {
498 * The byte sequence does overlap, copy non-overlapping
499 * part and then do a slow byte by byte copy for the
500 * overlapping part. Also, advance the destination
501 * pointer.
503 memcpy(dest, dest_back_addr, max_non_overlap);
504 dest += max_non_overlap;
505 dest_back_addr += max_non_overlap;
506 length -= max_non_overlap;
507 while (length--)
508 *dest++ = *dest_back_addr++;
510 /* Advance source position and continue with the next token. */
511 cb += 2;
513 /* No tokens left in the current tag. Continue with the next tag. */
514 goto do_next_tag;
515 return_overflow:
516 errno = EOVERFLOW;
517 ntfs_log_perror("Failed to decompress file");
518 return -1;
522 * ntfs_is_cb_compressed - internal function, do not use
524 * This is a very specialised function determining if a cb is compressed or
525 * uncompressed. It is assumed that checking for a sparse cb has already been
526 * performed and that the cb is not sparse. It makes all sorts of other
527 * assumptions as well and hence it is not useful anywhere other than where it
528 * is used at the moment. Please, do not make this function available for use
529 * outside of compress.c as it is bound to confuse people and not do what they
530 * want.
532 * Return TRUE on errors so that the error will be detected later on in the
533 * code. Might be a bit confusing to debug but there really should never be
534 * errors coming from here.
536 static BOOL ntfs_is_cb_compressed(ntfs_attr *na, runlist_element *rl,
537 VCN cb_start_vcn, int cb_clusters)
540 * The simplest case: the run starting at @cb_start_vcn contains
541 * @cb_clusters clusters which are all not sparse, thus the cb is not
542 * compressed.
544 restart:
545 cb_clusters -= rl->length - (cb_start_vcn - rl->vcn);
546 while (cb_clusters > 0) {
547 /* Go to the next run. */
548 rl++;
549 /* Map the next runlist fragment if it is not mapped. */
550 if (rl->lcn < LCN_HOLE || !rl->length) {
551 cb_start_vcn = rl->vcn;
552 rl = ntfs_attr_find_vcn(na, rl->vcn);
553 if (!rl || rl->lcn < LCN_HOLE || !rl->length)
554 return TRUE;
556 * If the runs were merged need to deal with the
557 * resulting partial run so simply restart.
559 if (rl->vcn < cb_start_vcn)
560 goto restart;
562 /* If the current run is sparse, the cb is compressed. */
563 if (rl->lcn == LCN_HOLE)
564 return TRUE;
565 /* If the whole cb is not sparse, it is not compressed. */
566 if (rl->length >= cb_clusters)
567 return FALSE;
568 cb_clusters -= rl->length;
570 /* All cb_clusters were not sparse thus the cb is not compressed. */
571 return FALSE;
575 * ntfs_compressed_attr_pread - read from a compressed attribute
576 * @na: ntfs attribute to read from
577 * @pos: byte position in the attribute to begin reading from
578 * @count: number of bytes to read
579 * @b: output data buffer
581 * NOTE: You probably want to be using attrib.c::ntfs_attr_pread() instead.
583 * This function will read @count bytes starting at offset @pos from the
584 * compressed ntfs attribute @na into the data buffer @b.
586 * On success, return the number of successfully read bytes. If this number
587 * is lower than @count this means that the read reached end of file or that
588 * an error was encountered during the read so that the read is partial.
589 * 0 means end of file or nothing was read (also return 0 when @count is 0).
591 * On error and nothing has been read, return -1 with errno set appropriately
592 * to the return code of ntfs_pread(), or to EINVAL in case of invalid
593 * arguments.
595 s64 ntfs_compressed_attr_pread(ntfs_attr *na, s64 pos, s64 count, void *b)
597 s64 br, to_read, ofs, total, total2;
598 u64 cb_size_mask;
599 VCN start_vcn, vcn, end_vcn;
600 ntfs_volume *vol;
601 runlist_element *rl;
602 u8 *dest, *cb, *cb_pos, *cb_end;
603 u32 cb_size;
604 int err;
605 ATTR_FLAGS data_flags;
606 FILE_ATTR_FLAGS compression;
607 unsigned int nr_cbs, cb_clusters;
609 ntfs_log_trace("Entering for inode 0x%llx, attr 0x%x, pos 0x%llx, count 0x%llx.\n",
610 (unsigned long long)na->ni->mft_no, na->type,
611 (long long)pos, (long long)count);
612 data_flags = na->data_flags;
613 compression = na->ni->flags & FILE_ATTR_COMPRESSED;
614 if (!na || !na->ni || !na->ni->vol || !b
615 || ((data_flags & ATTR_COMPRESSION_MASK)
616 != ATTR_IS_COMPRESSED)
617 || pos < 0 || count < 0) {
618 errno = EINVAL;
619 return -1;
622 * Encrypted attributes are not supported. We return access denied,
623 * which is what Windows NT4 does, too.
625 if (NAttrEncrypted(na)) {
626 errno = EACCES;
627 return -1;
629 if (!count)
630 return 0;
631 /* Truncate reads beyond end of attribute. */
632 if (pos + count > na->data_size) {
633 if (pos >= na->data_size) {
634 return 0;
636 count = na->data_size - pos;
638 /* If it is a resident attribute, simply use ntfs_attr_pread(). */
639 if (!NAttrNonResident(na))
640 return ntfs_attr_pread(na, pos, count, b);
641 total = total2 = 0;
642 /* Zero out reads beyond initialized size. */
643 if (pos + count > na->initialized_size) {
644 if (pos >= na->initialized_size) {
645 memset(b, 0, count);
646 return count;
648 total2 = pos + count - na->initialized_size;
649 count -= total2;
650 memset((u8*)b + count, 0, total2);
652 vol = na->ni->vol;
653 cb_size = na->compression_block_size;
654 cb_size_mask = cb_size - 1UL;
655 cb_clusters = na->compression_block_clusters;
657 /* Need a temporary buffer for each loaded compression block. */
658 cb = (u8*)ntfs_malloc(cb_size);
659 if (!cb)
660 return -1;
662 /* Need a temporary buffer for each uncompressed block. */
663 dest = (u8*)ntfs_malloc(cb_size);
664 if (!dest) {
665 free(cb);
666 return -1;
669 * The first vcn in the first compression block (cb) which we need to
670 * decompress.
672 start_vcn = (pos & ~cb_size_mask) >> vol->cluster_size_bits;
673 /* Offset in the uncompressed cb at which to start reading data. */
674 ofs = pos & cb_size_mask;
676 * The first vcn in the cb after the last cb which we need to
677 * decompress.
679 end_vcn = ((pos + count + cb_size - 1) & ~cb_size_mask) >>
680 vol->cluster_size_bits;
681 /* Number of compression blocks (cbs) in the wanted vcn range. */
682 nr_cbs = (end_vcn - start_vcn) << vol->cluster_size_bits >>
683 na->compression_block_size_bits;
684 cb_end = cb + cb_size;
685 do_next_cb:
686 nr_cbs--;
687 cb_pos = cb;
688 vcn = start_vcn;
689 start_vcn += cb_clusters;
691 /* Check whether the compression block is sparse. */
692 rl = ntfs_attr_find_vcn(na, vcn);
693 if (!rl || rl->lcn < LCN_HOLE) {
694 free(cb);
695 free(dest);
696 if (total)
697 return total;
698 /* FIXME: Do we want EIO or the error code? (AIA) */
699 errno = EIO;
700 return -1;
702 if (rl->lcn == LCN_HOLE) {
703 /* Sparse cb, zero out destination range overlapping the cb. */
704 ntfs_log_debug("Found sparse compression block.\n");
705 to_read = min(count, cb_size - ofs);
706 memset(b, 0, to_read);
707 ofs = 0;
708 total += to_read;
709 count -= to_read;
710 b = (u8*)b + to_read;
711 } else if (!ntfs_is_cb_compressed(na, rl, vcn, cb_clusters)) {
712 s64 tdata_size, tinitialized_size;
714 * Uncompressed cb, read it straight into the destination range
715 * overlapping the cb.
717 ntfs_log_debug("Found uncompressed compression block.\n");
719 * Read the uncompressed data into the destination buffer.
720 * NOTE: We cheat a little bit here by marking the attribute as
721 * not compressed in the ntfs_attr structure so that we can
722 * read the data by simply using ntfs_attr_pread(). (-8
723 * NOTE: we have to modify data_size and initialized_size
724 * temporarily as well...
726 to_read = min(count, cb_size - ofs);
727 ofs += vcn << vol->cluster_size_bits;
728 NAttrClearCompressed(na);
729 na->data_flags &= ~ATTR_COMPRESSION_MASK;
730 tdata_size = na->data_size;
731 tinitialized_size = na->initialized_size;
732 na->data_size = na->initialized_size = na->allocated_size;
733 do {
734 br = ntfs_attr_pread(na, ofs, to_read, b);
735 if (br < 0) {
736 err = errno;
737 na->data_size = tdata_size;
738 na->initialized_size = tinitialized_size;
739 na->ni->flags |= compression;
740 na->data_flags = data_flags;
741 free(cb);
742 free(dest);
743 if (total)
744 return total;
745 errno = err;
746 return br;
748 total += br;
749 count -= br;
750 b = (u8*)b + br;
751 to_read -= br;
752 ofs += br;
753 } while (to_read > 0);
754 na->data_size = tdata_size;
755 na->initialized_size = tinitialized_size;
756 na->ni->flags |= compression;
757 na->data_flags = data_flags;
758 ofs = 0;
759 } else {
760 s64 tdata_size, tinitialized_size;
763 * Compressed cb, decompress it into the temporary buffer, then
764 * copy the data to the destination range overlapping the cb.
766 ntfs_log_debug("Found compressed compression block.\n");
768 * Read the compressed data into the temporary buffer.
769 * NOTE: We cheat a little bit here by marking the attribute as
770 * not compressed in the ntfs_attr structure so that we can
771 * read the raw, compressed data by simply using
772 * ntfs_attr_pread(). (-8
773 * NOTE: We have to modify data_size and initialized_size
774 * temporarily as well...
776 to_read = cb_size;
777 NAttrClearCompressed(na);
778 na->data_flags &= ~ATTR_COMPRESSION_MASK;
779 tdata_size = na->data_size;
780 tinitialized_size = na->initialized_size;
781 na->data_size = na->initialized_size = na->allocated_size;
782 do {
783 br = ntfs_attr_pread(na,
784 (vcn << vol->cluster_size_bits) +
785 (cb_pos - cb), to_read, cb_pos);
786 if (br < 0) {
787 err = errno;
788 na->data_size = tdata_size;
789 na->initialized_size = tinitialized_size;
790 na->ni->flags |= compression;
791 na->data_flags = data_flags;
792 free(cb);
793 free(dest);
794 if (total)
795 return total;
796 errno = err;
797 return br;
799 cb_pos += br;
800 to_read -= br;
801 } while (to_read > 0);
802 na->data_size = tdata_size;
803 na->initialized_size = tinitialized_size;
804 na->ni->flags |= compression;
805 na->data_flags = data_flags;
806 /* Just a precaution. */
807 if (cb_pos + 2 <= cb_end)
808 *(u16*)cb_pos = 0;
809 ntfs_log_debug("Successfully read the compression block.\n");
810 if (ntfs_decompress(dest, cb_size, cb, cb_size) < 0) {
811 err = errno;
812 free(cb);
813 free(dest);
814 if (total)
815 return total;
816 errno = err;
817 return -1;
819 to_read = min(count, cb_size - ofs);
820 memcpy(b, dest + ofs, to_read);
821 total += to_read;
822 count -= to_read;
823 b = (u8*)b + to_read;
824 ofs = 0;
826 /* Do we have more work to do? */
827 if (nr_cbs)
828 goto do_next_cb;
829 /* We no longer need the buffers. */
830 free(cb);
831 free(dest);
832 /* Return number of bytes read. */
833 return total + total2;
837 * Read data from a set of clusters
839 * Returns the amount of data read
842 static u32 read_clusters(ntfs_volume *vol, const runlist_element *rl,
843 s64 offs, u32 to_read, char *inbuf)
845 u32 count;
846 int xgot;
847 u32 got;
848 s64 xpos;
849 BOOL first;
850 char *xinbuf;
851 const runlist_element *xrl;
853 got = 0;
854 xrl = rl;
855 xinbuf = inbuf;
856 first = TRUE;
857 do {
858 count = xrl->length << vol->cluster_size_bits;
859 xpos = xrl->lcn << vol->cluster_size_bits;
860 if (first) {
861 count -= offs;
862 xpos += offs;
864 if ((to_read - got) < count)
865 count = to_read - got;
866 xgot = ntfs_pread(vol->dev, xpos, count, xinbuf);
867 if (xgot == (int)count) {
868 got += count;
869 xpos += count;
870 xinbuf += count;
871 xrl++;
873 first = FALSE;
874 } while ((xgot == (int)count) && (got < to_read));
875 return (got);
879 * Write data to a set of clusters
881 * Returns the amount of data written
884 static int write_clusters(ntfs_volume *vol, const runlist_element *rl,
885 s64 offs, int to_write, const char *outbuf)
887 int count;
888 int put, xput;
889 s64 xpos;
890 BOOL first;
891 const char *xoutbuf;
892 const runlist_element *xrl;
894 put = 0;
895 xrl = rl;
896 xoutbuf = outbuf;
897 first = TRUE;
898 do {
899 count = xrl->length << vol->cluster_size_bits;
900 xpos = xrl->lcn << vol->cluster_size_bits;
901 if (first) {
902 count -= offs;
903 xpos += offs;
905 if ((to_write - put) < count)
906 count = to_write - put;
907 xput = ntfs_pwrite(vol->dev, xpos, count, xoutbuf);
908 if (xput == count) {
909 put += count;
910 xpos += count;
911 xoutbuf += count;
912 xrl++;
914 first = FALSE;
915 } while ((xput == count) && (put < to_write));
916 return (put);
921 * Compress and write a set of blocks
923 * returns the size actually written (rounded to a full cluster)
924 * or 0 if all zeroes (nothing is written)
925 * or -1 if could not compress (nothing is written)
926 * or -2 if there were an irrecoverable error (errno set)
929 static int ntfs_comp_set(ntfs_attr *na, runlist_element *rl,
930 s64 offs, unsigned int insz, const char *inbuf)
932 ntfs_volume *vol;
933 char *outbuf;
934 char *pbuf;
935 unsigned int compsz;
936 int written;
937 int rounded;
938 unsigned int clsz;
939 unsigned int p;
940 unsigned int sz;
941 unsigned int bsz;
942 BOOL fail;
943 BOOL allzeroes;
944 /* a single compressed zero */
945 static char onezero[] = { 0x01, 0xb0, 0x00, 0x00 } ;
946 /* a couple of compressed zeroes */
947 static char twozeroes[] = { 0x02, 0xb0, 0x00, 0x00, 0x00 } ;
948 /* more compressed zeroes, to be followed by some count */
949 static char morezeroes[] = { 0x03, 0xb0, 0x02, 0x00 } ;
951 vol = na->ni->vol;
952 written = -1; /* default return */
953 clsz = 1 << vol->cluster_size_bits;
954 /* may need 2 extra bytes per block and 2 more bytes */
955 outbuf = (char*)ntfs_malloc(na->compression_block_size
956 + 2*(na->compression_block_size/NTFS_SB_SIZE)
957 + 2);
958 if (outbuf) {
959 fail = FALSE;
960 compsz = 0;
961 allzeroes = TRUE;
962 for (p=0; (p<insz) && !fail; p+=NTFS_SB_SIZE) {
963 if ((p + NTFS_SB_SIZE) < insz)
964 bsz = NTFS_SB_SIZE;
965 else
966 bsz = insz - p;
967 pbuf = &outbuf[compsz];
968 sz = ntfs_compress_block(&inbuf[p],bsz,pbuf);
969 /* fail if all the clusters (or more) are needed */
970 if (!sz || ((compsz + sz + clsz + 2)
971 > na->compression_block_size))
972 fail = TRUE;
973 else {
974 if (allzeroes) {
975 /* check whether this is all zeroes */
976 switch (sz) {
977 case 4 :
978 allzeroes = !memcmp(
979 pbuf,onezero,4);
980 break;
981 case 5 :
982 allzeroes = !memcmp(
983 pbuf,twozeroes,5);
984 break;
985 case 6 :
986 allzeroes = !memcmp(
987 pbuf,morezeroes,4);
988 break;
989 default :
990 allzeroes = FALSE;
991 break;
994 compsz += sz;
997 if (!fail && !allzeroes) {
998 /* add a couple of null bytes, space has been checked */
999 outbuf[compsz++] = 0;
1000 outbuf[compsz++] = 0;
1001 /* write a full cluster, to avoid partial reading */
1002 rounded = ((compsz - 1) | (clsz - 1)) + 1;
1003 written = write_clusters(vol, rl, offs, rounded, outbuf);
1004 if (written != rounded) {
1005 // previously written text has been spoilt, should return a specific error
1006 ntfs_log_error("error writing compressed data\n");
1007 errno = EIO;
1008 written = -2;
1010 } else
1011 if (!fail)
1012 written = 0;
1013 free(outbuf);
1015 return (written);
1019 * Free unneeded clusters after compression
1021 * This generally requires an empty slot at the end of runlist,
1022 * but we do not want to reallocate the runlist here because
1023 * there are many pointers to it.
1024 * So the empty slot has to be reserved beforehand
1026 * Returns zero unless some error occurred (described by errno)
1029 static int ntfs_compress_free(ntfs_attr *na, runlist_element *rl,
1030 s64 used, s64 reserved)
1032 int freecnt;
1033 int usedcnt;
1034 int res;
1035 s64 freelcn;
1036 s64 freevcn;
1037 int freelength;
1038 BOOL mergeholes;
1039 BOOL beginhole;
1040 ntfs_volume *vol;
1041 runlist_element *freerl;
1043 res = -1; /* default return */
1044 vol = na->ni->vol;
1045 freecnt = (reserved - used) >> vol->cluster_size_bits;
1046 usedcnt = (reserved >> vol->cluster_size_bits) - freecnt;
1047 /* skip entries fully used, if any */
1048 while (rl->length && (rl->length < usedcnt)) {
1049 usedcnt -= rl->length;
1050 rl++;
1052 if (rl->length) {
1054 * Splitting the current allocation block requires
1055 * an extra runlist element to create the hole.
1056 * The required entry has been prereserved when
1057 * mapping the runlist.
1059 freelcn = rl->lcn + usedcnt;
1060 freevcn = rl->vcn + usedcnt;
1061 freelength = rl->length - usedcnt;
1062 /* new count of allocated clusters */
1063 rl->length = usedcnt; /* warning : can be zero */
1064 if (!((freevcn + freecnt)
1065 & (na->compression_block_clusters - 1))) {
1066 beginhole = !usedcnt && !rl->vcn;
1067 mergeholes = !usedcnt
1068 && rl[0].vcn
1069 && (rl[-1].lcn == LCN_HOLE);
1070 if (mergeholes) {
1071 freerl = rl;
1072 freerl->length = freecnt;
1073 } else
1074 freerl = ++rl;
1075 if ((freelength > 0)
1076 && !mergeholes
1077 && (usedcnt || beginhole)) {
1079 * move the unused part to the end. Doing so,
1080 * the vcn will be out of order. This does
1081 * not harm, the vcn are meaningless now, and
1082 * only the lcn are meaningful for freeing.
1084 /* locate current end */
1085 while (rl->length)
1086 rl++;
1087 /* new terminator relocated */
1088 rl[1].vcn = rl->vcn;
1089 rl[1].lcn = LCN_ENOENT;
1090 rl[1].length = 0;
1091 /* hole, currently allocated */
1092 rl->vcn = freevcn;
1093 rl->lcn = freelcn;
1094 rl->length = freelength;
1096 /* free the hole */
1097 res = ntfs_cluster_free_from_rl(vol,freerl);
1098 if (!res) {
1099 if (mergeholes) {
1100 /* merge with adjacent hole */
1101 freerl--;
1102 freerl->length += freecnt;
1103 } else {
1104 if (beginhole)
1105 freerl--;
1106 /* mark hole as free */
1107 freerl->lcn = LCN_HOLE;
1108 freerl->vcn = freevcn;
1109 freerl->length = freecnt;
1111 /* and set up the new end */
1112 freerl[1].lcn = LCN_ENOENT;
1113 freerl[1].vcn = freevcn + freecnt;
1114 freerl[1].length = 0;
1116 } else {
1117 ntfs_log_error("Bad end of a compression block set\n");
1118 errno = EIO;
1120 } else {
1121 ntfs_log_error("No cluster to free after compression\n");
1122 errno = EIO;
1124 return (res);
1128 * Read existing data, decompress and append buffer
1129 * Do nothing if something fails
1132 static int ntfs_read_append(ntfs_attr *na, const runlist_element *rl,
1133 s64 offs, u32 compsz, int pos,
1134 char *outbuf, s64 to_write, const void *b)
1136 int fail = 1;
1137 char *compbuf;
1138 u32 decompsz;
1139 u32 got;
1141 if (compsz == na->compression_block_size) {
1142 /* if the full block was requested, it was a hole */
1143 memset(outbuf,0,compsz);
1144 memcpy(&outbuf[pos],b,to_write);
1145 fail = 0;
1146 } else {
1147 compbuf = (char*)ntfs_malloc(compsz);
1148 if (compbuf) {
1149 /* must align to full block for decompression */
1150 decompsz = ((pos - 1) | (NTFS_SB_SIZE - 1)) + 1;
1151 got = read_clusters(na->ni->vol, rl, offs,
1152 compsz, compbuf);
1153 if ((got == compsz)
1154 && !ntfs_decompress((u8*)outbuf,decompsz,
1155 (u8*)compbuf,compsz)) {
1156 memcpy(&outbuf[pos],b,to_write);
1157 fail = 0;
1159 free(compbuf);
1162 return (fail);
1166 * Flush a full compression block
1168 * returns the size actually written (rounded to a full cluster)
1169 * or 0 if could not compress (and written uncompressed)
1170 * or -1 if there were an irrecoverable error (errno set)
1173 static int ntfs_flush(ntfs_attr *na, runlist_element *rl, s64 offs,
1174 const char *outbuf, int count, BOOL compress)
1176 int rounded;
1177 int written;
1178 int clsz;
1180 if (compress) {
1181 written = ntfs_comp_set(na, rl, offs, count, outbuf);
1182 if (written == -1)
1183 compress = FALSE;
1184 if ((written >= 0)
1185 && ntfs_compress_free(na,rl,offs + written,
1186 offs + na->compression_block_size))
1187 written = -1;
1189 if (!compress) {
1190 clsz = 1 << na->ni->vol->cluster_size_bits;
1191 rounded = ((count - 1) | (clsz - 1)) + 1;
1192 written = write_clusters(na->ni->vol, rl,
1193 offs, rounded, outbuf);
1194 if (written != rounded)
1195 written = -1;
1197 return (written);
1201 * Write some data to be compressed.
1202 * Compression only occurs when a few clusters (usually 16) are
1203 * full. When this occurs an extra runlist slot may be needed, so
1204 * it has to be reserved beforehand.
1206 * Returns the size of uncompressed data written,
1207 * or zero if an error occurred.
1208 * When the returned size is less than requested, new clusters have
1209 * to be allocated before the function is called again.
1212 s64 ntfs_compressed_pwrite(ntfs_attr *na, runlist_element *wrl, s64 wpos,
1213 s64 offs, s64 to_write, s64 rounded,
1214 const void *b, int compressed_part)
1216 ntfs_volume *vol;
1217 runlist_element *brl; /* entry containing the beginning of block */
1218 int compression_length;
1219 s64 written;
1220 s64 to_read;
1221 s64 roffs;
1222 s64 got;
1223 s64 start_vcn;
1224 s64 nextblock;
1225 u32 compsz;
1226 char *inbuf;
1227 char *outbuf;
1228 BOOL fail;
1229 BOOL done;
1230 BOOL compress;
1232 written = 0; /* default return */
1233 vol = na->ni->vol;
1234 compression_length = na->compression_block_clusters;
1235 compress = FALSE;
1236 done = FALSE;
1238 * Cannot accept writing beyond the current compression set
1239 * because when compression occurs, clusters are freed
1240 * and have to be reallocated.
1241 * (cannot happen with standard fuse 4K buffers)
1242 * Caller has to avoid this situation, or face consequences.
1244 nextblock = ((offs + (wrl->vcn << vol->cluster_size_bits))
1245 | (na->compression_block_size - 1)) + 1;
1246 if ((offs + to_write + (wrl->vcn << vol->cluster_size_bits))
1247 >= nextblock) {
1248 /* it is time to compress */
1249 compress = TRUE;
1250 /* only process what we can */
1251 to_write = rounded = nextblock
1252 - (offs + (wrl->vcn << vol->cluster_size_bits));
1254 start_vcn = 0;
1255 fail = FALSE;
1256 brl = wrl;
1257 roffs = 0;
1259 * If we are about to compress or we need to decompress
1260 * existing data, we have to process a full set of blocks.
1261 * So relocate the parameters to the beginning of allocation
1262 * containing the first byte of the set of blocks.
1264 if (compress || compressed_part) {
1265 /* find the beginning of block */
1266 start_vcn = (wrl->vcn + (offs >> vol->cluster_size_bits))
1267 & -compression_length;
1268 while (brl->vcn && (brl->vcn > start_vcn)) {
1269 /* jumping back a hole means big trouble */
1270 if (brl->lcn == (LCN)LCN_HOLE) {
1271 ntfs_log_error("jump back over a hole when appending\n");
1272 fail = TRUE;
1273 errno = EIO;
1275 brl--;
1276 offs += brl->length << vol->cluster_size_bits;
1278 roffs = (start_vcn - brl->vcn) << vol->cluster_size_bits;
1280 if (compressed_part && !fail) {
1282 * The set of compression blocks contains compressed data
1283 * (we are reopening an existing file to append to it)
1284 * Decompress the data and append
1286 compsz = compressed_part << vol->cluster_size_bits;
1287 // improve the needed size
1288 outbuf = (char*)ntfs_malloc(na->compression_block_size);
1289 if (outbuf) {
1290 to_read = offs - roffs;
1291 if (!ntfs_read_append(na, brl, roffs, compsz,
1292 to_read, outbuf, to_write, b)) {
1293 written = ntfs_flush(na, brl, roffs,
1294 outbuf, to_read + to_write, compress);
1295 if (written >= 0) {
1296 written = to_write;
1297 done = TRUE;
1300 free(outbuf);
1302 } else {
1303 if (compress && !fail) {
1305 * we are filling up a block, read the full set of blocks
1306 * and compress it
1308 inbuf = (char*)ntfs_malloc(na->compression_block_size);
1309 if (inbuf) {
1310 to_read = offs - roffs;
1311 if (to_read)
1312 got = read_clusters(vol, brl, roffs,
1313 to_read, inbuf);
1314 else
1315 got = 0;
1316 if (got == to_read) {
1317 memcpy(&inbuf[to_read],b,to_write);
1318 written = ntfs_comp_set(na, brl, roffs,
1319 to_read + to_write, inbuf);
1321 * if compression was not successful,
1322 * only write the part which was requested
1324 if ((written >= 0)
1325 /* free the unused clusters */
1326 && !ntfs_compress_free(na,brl,
1327 written + roffs,
1328 na->compression_block_size
1329 + roffs)) {
1330 done = TRUE;
1331 written = to_write;
1334 free(inbuf);
1337 if (!done) {
1339 * if the compression block is not full, or
1340 * if compression failed for whatever reason,
1341 * write uncompressed
1343 /* check we are not overflowing current allocation */
1344 if ((wpos + rounded)
1345 > ((wrl->lcn + wrl->length)
1346 << vol->cluster_size_bits)) {
1347 ntfs_log_error("writing on unallocated clusters\n");
1348 errno = EIO;
1349 } else {
1350 written = ntfs_pwrite(vol->dev, wpos,
1351 rounded, b);
1352 if (written == rounded)
1353 written = to_write;
1357 return (written);
1361 * Close a file written compressed.
1362 * This compresses the last partial compression block of the file.
1363 * An empty runlist slot has to be reserved beforehand.
1365 * Returns zero if closing is successful.
1368 int ntfs_compressed_close(ntfs_attr *na, runlist_element *wrl, s64 offs)
1370 ntfs_volume *vol;
1371 runlist_element *brl; /* entry containing the beginning of block */
1372 int compression_length;
1373 s64 written;
1374 s64 to_read;
1375 s64 roffs;
1376 s64 got;
1377 s64 start_vcn;
1378 char *inbuf;
1379 BOOL fail;
1380 BOOL done;
1382 vol = na->ni->vol;
1383 compression_length = na->compression_block_clusters;
1384 done = FALSE;
1386 * There generally is an uncompressed block at end of file,
1387 * read the full block and compress it
1389 inbuf = (char*)ntfs_malloc(na->compression_block_size);
1390 if (inbuf) {
1391 start_vcn = (wrl->vcn + (offs >> vol->cluster_size_bits))
1392 & -compression_length;
1393 to_read = offs + ((wrl->vcn - start_vcn) << vol->cluster_size_bits);
1394 brl = wrl;
1395 fail = FALSE;
1396 while (brl->vcn && (brl->vcn > start_vcn)) {
1397 if (brl->lcn == (LCN)LCN_HOLE) {
1398 ntfs_log_error("jump back over a hole when closing\n");
1399 fail = TRUE;
1400 errno = EIO;
1402 brl--;
1404 if (!fail) {
1405 /* roffs can be an offset from another uncomp block */
1406 roffs = (start_vcn - brl->vcn) << vol->cluster_size_bits;
1407 if (to_read) {
1408 got = read_clusters(vol, brl, roffs, to_read,
1409 inbuf);
1410 if (got == to_read) {
1411 written = ntfs_comp_set(na, brl, roffs,
1412 to_read, inbuf);
1413 if ((written >= 0)
1414 /* free the unused clusters */
1415 && !ntfs_compress_free(na,brl,
1416 written + roffs,
1417 na->compression_block_size + roffs)) {
1418 done = TRUE;
1419 } else
1420 /* if compression failed, leave uncompressed */
1421 if (written == -1)
1422 done = TRUE;
1424 } else
1425 done = TRUE;
1426 free(inbuf);
1429 return (!done);