1 ;;; GNU Guix --- Functional package management for GNU
2 ;;; Copyright © 2019 Pierre Neidhardt <mail@ambrevar.xyz>
4 ;;; This file is part of GNU Guix.
6 ;;; GNU Guix is free software; you can redistribute it and/or modify it
7 ;;; under the terms of the GNU General Public License as published by
8 ;;; the Free Software Foundation; either version 3 of the License, or (at
9 ;;; your option) any later version.
11 ;;; GNU Guix is distributed in the hope that it will be useful, but
12 ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;;; GNU General Public License for more details.
16 ;;; You should have received a copy of the GNU General Public License
17 ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
19 (define-module (guix lzlib)
20 #:use-module (rnrs bytevectors)
21 #:use-module (rnrs arithmetic bitwise)
22 #:use-module (ice-9 binary-ports)
23 #:use-module (ice-9 match)
24 #:use-module (system foreign)
25 #:use-module (guix config)
26 #:export (lzlib-available?
29 call-with-lzip-input-port
30 call-with-lzip-output-port
31 %default-member-length-limit
32 %default-compression-level))
36 ;;; Bindings to the lzlib / liblz API. Some convenience functions are also
37 ;;; provided (see the export).
39 ;;; While the bindings are complete, the convenience functions only support
40 ;;; single member archives. To decompress single member archives, we loop
41 ;;; until lz-decompress-read returns 0. This is simpler. To support multiple
42 ;;; members properly, we need (among others) to call lz-decompress-finish and
43 ;;; loop over lz-decompress-read until lz-decompress-finished? returns #t.
44 ;;; Otherwise a multi-member archive starting with an empty member would only
45 ;;; decompress the empty member and stop there, resulting in truncated output.
50 ;; File name of lzlib's shared library. When updating via 'guix pull',
51 ;; '%liblz' might be undefined so protect against it.
52 (delay (dynamic-link (if (defined? '%liblz)
56 (define (lzlib-available?)
57 "Return true if lzlib is available, #f otherwise."
58 (false-if-exception (force %lzlib)))
60 (define (lzlib-procedure ret name parameters)
61 "Return a procedure corresponding to C function NAME in liblz, or #f if
62 either lzlib or the function could not be found."
63 (match (false-if-exception (dynamic-func name (force %lzlib)))
65 (pointer->procedure ret ptr parameters))
69 (define-wrapped-pointer-type <lz-decoder>
70 ;; Scheme counterpart of the 'LZ_Decoder' opaque type.
75 (format port "#<lz-decoder ~a>"
76 (number->string (object-address obj) 16))))
78 (define-wrapped-pointer-type <lz-encoder>
79 ;; Scheme counterpart of the 'LZ_Encoder' opaque type.
84 (format port "#<lz-encoder ~a>"
85 (number->string (object-address obj) 16))))
88 (define %error-number-ok 0)
89 (define %error-number-bad-argument 1)
90 (define %error-number-mem-error 2)
91 (define %error-number-sequence-error 3)
92 (define %error-number-header-error 4)
93 (define %error-number-unexpected-eof 5)
94 (define %error-number-data-error 6)
95 (define %error-number-library-error 7)
98 ;; Compression bindings.
100 (define lz-compress-open
101 (let ((proc (lzlib-procedure '* "LZ_compress_open" (list int int uint64)))
102 ;; member-size is an "unsigned long long", and the C standard guarantees
103 ;; a minimum range of 0..2^64-1.
104 (unlimited-size (- (expt 2 64) 1)))
105 (lambda* (dictionary-size match-length-limit #:optional (member-size unlimited-size))
106 "Initialize the internal stream state for compression and returns a
107 pointer that can only be used as the encoder argument for the other
108 lz-compress functions, or a null pointer if the encoder could not be
111 See the manual: (lzlib) Compression functions."
112 (let ((encoder-ptr (proc dictionary-size match-length-limit member-size)))
113 (if (not (= (lz-compress-error encoder-ptr) -1))
114 (pointer->lz-encoder encoder-ptr)
115 (throw 'lzlib-error 'lz-compress-open))))))
117 (define lz-compress-close
118 (let ((proc (lzlib-procedure int "LZ_compress_close" '(*))))
120 "Close encoder. ENCODER can no longer be used as an argument to any
121 lz-compress function. "
122 (let ((ret (proc (lz-encoder->pointer encoder))))
124 (throw 'lzlib-error 'lz-compress-close ret)
127 (define lz-compress-finish
128 (let ((proc (lzlib-procedure int "LZ_compress_finish" '(*))))
130 "Tell that all the data for this member have already been written (with
131 the `lz-compress-write' function). It is safe to call `lz-compress-finish' as
132 many times as needed. After all the produced compressed data have been read
133 with `lz-compress-read' and `lz-compress-member-finished?' returns #t, a new
134 member can be started with 'lz-compress-restart-member'."
135 (let ((ret (proc (lz-encoder->pointer encoder))))
137 (throw 'lzlib-error 'lz-compress-finish (lz-compress-error encoder))
140 (define lz-compress-restart-member
141 (let ((proc (lzlib-procedure int "LZ_compress_restart_member" (list '* uint64))))
142 (lambda (encoder member-size)
143 "Start a new member in a multimember data stream.
144 Call this function only after `lz-compress-member-finished?' indicates that the
145 current member has been fully read (with the `lz-compress-read' function)."
146 (let ((ret (proc (lz-encoder->pointer encoder) member-size)))
148 (throw 'lzlib-error 'lz-compress-restart-member
149 (lz-compress-error encoder))
152 (define lz-compress-sync-flush
153 (let ((proc (lzlib-procedure int "LZ_compress_sync_flush" (list '*))))
155 "Make available to `lz-compress-read' all the data already written with
156 the `LZ-compress-write' function. First call `lz-compress-sync-flush'. Then
157 call 'lz-compress-read' until it returns 0.
159 Repeated use of `LZ-compress-sync-flush' may degrade compression ratio,
160 so use it only when needed. "
161 (let ((ret (proc (lz-encoder->pointer encoder))))
163 (throw 'lzlib-error 'lz-compress-sync-flush
164 (lz-compress-error encoder))
167 (define lz-compress-read
168 (let ((proc (lzlib-procedure int "LZ_compress_read" (list '* '* int))))
169 (lambda* (encoder lzfile-bv #:optional (start 0) (count (bytevector-length lzfile-bv)))
170 "Read up to COUNT bytes from the encoder stream, storing the results in LZFILE-BV.
171 Return the number of uncompressed bytes written, a strictly positive integer."
172 (let ((ret (proc (lz-encoder->pointer encoder)
173 (bytevector->pointer lzfile-bv start)
176 (throw 'lzlib-error 'lz-compress-read (lz-compress-error encoder))
179 (define lz-compress-write
180 (let ((proc (lzlib-procedure int "LZ_compress_write" (list '* '* int))))
181 (lambda* (encoder bv #:optional (start 0) (count (bytevector-length bv)))
182 "Write up to COUNT bytes from BV to the encoder stream. Return the
183 number of uncompressed bytes written, a strictly positive integer."
184 (let ((ret (proc (lz-encoder->pointer encoder)
185 (bytevector->pointer bv start)
188 (throw 'lzlib-error 'lz-compress-write (lz-compress-error encoder))
191 (define lz-compress-write-size
192 (let ((proc (lzlib-procedure int "LZ_compress_write_size" '(*))))
194 "The maximum number of bytes that can be immediately written through the
195 `lz-compress-write' function.
197 It is guaranteed that an immediate call to `lz-compress-write' will accept a
198 SIZE up to the returned number of bytes. "
199 (let ((ret (proc (lz-encoder->pointer encoder))))
201 (throw 'lzlib-error 'lz-compress-write-size (lz-compress-error encoder))
204 (define lz-compress-error
205 (let ((proc (lzlib-procedure int "LZ_compress_errno" '(*))))
207 "ENCODER can be a Scheme object or a pointer."
208 (let* ((error-number (proc (if (lz-encoder? encoder)
209 (lz-encoder->pointer encoder)
213 (define lz-compress-finished?
214 (let ((proc (lzlib-procedure int "LZ_compress_finished" '(*))))
216 "Return #t if all the data have been read and `lz-compress-close' can
217 be safely called. Otherwise return #f."
218 (let ((ret (proc (lz-encoder->pointer encoder))))
222 (_ (throw 'lzlib-error 'lz-compress-finished? (lz-compress-error encoder))))))))
224 (define lz-compress-member-finished?
225 (let ((proc (lzlib-procedure int "LZ_compress_member_finished" '(*))))
227 "Return #t if the current member, in a multimember data stream, has
228 been fully read and 'lz-compress-restart-member' can be safely called.
229 Otherwise return #f."
230 (let ((ret (proc (lz-encoder->pointer encoder))))
234 (_ (throw 'lzlib-error 'lz-compress-member-finished? (lz-compress-error encoder))))))))
236 (define lz-compress-data-position
237 (let ((proc (lzlib-procedure uint64 "LZ_compress_data_position" '(*))))
239 "Return the number of input bytes already compressed in the current
241 (let ((ret (proc (lz-encoder->pointer encoder))))
243 (throw 'lzlib-error 'lz-compress-data-position
244 (lz-compress-error encoder))
247 (define lz-compress-member-position
248 (let ((proc (lzlib-procedure uint64 "LZ_compress_member_position" '(*))))
250 "Return the number of compressed bytes already produced, but perhaps
251 not yet read, in the current member."
252 (let ((ret (proc (lz-encoder->pointer encoder))))
254 (throw 'lzlib-error 'lz-compress-member-position
255 (lz-compress-error encoder))
258 (define lz-compress-total-in-size
259 (let ((proc (lzlib-procedure uint64 "LZ_compress_total_in_size" '(*))))
261 "Return the total number of input bytes already compressed."
262 (let ((ret (proc (lz-encoder->pointer encoder))))
264 (throw 'lzlib-error 'lz-compress-total-in-size
265 (lz-compress-error encoder))
268 (define lz-compress-total-out-size
269 (let ((proc (lzlib-procedure uint64 "LZ_compress_total_out_size" '(*))))
271 "Return the total number of compressed bytes already produced, but
272 perhaps not yet read."
273 (let ((ret (proc (lz-encoder->pointer encoder))))
275 (throw 'lzlib-error 'lz-compress-total-out-size
276 (lz-compress-error encoder))
280 ;; Decompression bindings.
282 (define lz-decompress-open
283 (let ((proc (lzlib-procedure '* "LZ_decompress_open" '())))
285 "Initializes the internal stream state for decompression and returns a
286 pointer that can only be used as the decoder argument for the other
287 lz-decompress functions, or a null pointer if the decoder could not be
290 See the manual: (lzlib) Decompression functions."
291 (let ((decoder-ptr (proc)))
292 (if (not (= (lz-decompress-error decoder-ptr) -1))
293 (pointer->lz-decoder decoder-ptr)
294 (throw 'lzlib-error 'lz-decompress-open))))))
296 (define lz-decompress-close
297 (let ((proc (lzlib-procedure int "LZ_decompress_close" '(*))))
299 "Close decoder. DECODER can no longer be used as an argument to any
300 lz-decompress function. "
301 (let ((ret (proc (lz-decoder->pointer decoder))))
303 (throw 'lzlib-error 'lz-decompress-close ret)
306 (define lz-decompress-finish
307 (let ((proc (lzlib-procedure int "LZ_decompress_finish" '(*))))
309 "Tell that all the data for this stream have already been written (with
310 the `lz-decompress-write' function). It is safe to call
311 `lz-decompress-finish' as many times as needed."
312 (let ((ret (proc (lz-decoder->pointer decoder))))
314 (throw 'lzlib-error 'lz-decompress-finish (lz-decompress-error decoder))
317 (define lz-decompress-reset
318 (let ((proc (lzlib-procedure int "LZ_decompress_reset" '(*))))
320 "Reset the internal state of DECODER as it was just after opening it
321 with the `lz-decompress-open' function. Data stored in the internal buffers
322 is discarded. Position counters are set to 0."
323 (let ((ret (proc (lz-decoder->pointer decoder))))
325 (throw 'lzlib-error 'lz-decompress-reset
326 (lz-decompress-error decoder))
329 (define lz-decompress-sync-to-member
330 (let ((proc (lzlib-procedure int "LZ_decompress_sync_to_member" '(*))))
332 "Reset the error state of DECODER and enters a search state that lasts
333 until a new member header (or the end of the stream) is found. After a
334 successful call to `lz-decompress-sync-to-member', data written with
335 `lz-decompress-write' will be consumed and 'lz-decompress-read' will return 0
336 until a header is found.
338 This function is useful to discard any data preceding the first member, or to
339 discard the rest of the current member, for example in case of a data
340 error. If the decoder is already at the beginning of a member, this function
342 (let ((ret (proc (lz-decoder->pointer decoder))))
344 (throw 'lzlib-error 'lz-decompress-sync-to-member
345 (lz-decompress-error decoder))
348 (define lz-decompress-read
349 (let ((proc (lzlib-procedure int "LZ_decompress_read" (list '* '* int))))
350 (lambda* (decoder file-bv #:optional (start 0) (count (bytevector-length file-bv)))
351 "Read up to COUNT bytes from the decoder stream, storing the results in FILE-BV.
352 Return the number of uncompressed bytes written, a non-negative positive integer."
353 (let ((ret (proc (lz-decoder->pointer decoder)
354 (bytevector->pointer file-bv start)
357 (throw 'lzlib-error 'lz-decompress-read (lz-decompress-error decoder))
360 (define lz-decompress-write
361 (let ((proc (lzlib-procedure int "LZ_decompress_write" (list '* '* int))))
362 (lambda* (decoder bv #:optional (start 0) (count (bytevector-length bv)))
363 "Write up to COUNT bytes from BV to the decoder stream. Return the
364 number of uncompressed bytes written, a non-negative integer."
365 (let ((ret (proc (lz-decoder->pointer decoder)
366 (bytevector->pointer bv start)
369 (throw 'lzlib-error 'lz-decompress-write (lz-decompress-error decoder))
372 (define lz-decompress-write-size
373 (let ((proc (lzlib-procedure int "LZ_decompress_write_size" '(*))))
375 "Return the maximum number of bytes that can be immediately written
376 through the `lz-decompress-write' function.
378 It is guaranteed that an immediate call to `lz-decompress-write' will accept a
379 SIZE up to the returned number of bytes. "
380 (let ((ret (proc (lz-decoder->pointer decoder))))
382 (throw 'lzlib-error 'lz-decompress-write-size (lz-decompress-error decoder))
385 (define lz-decompress-error
386 (let ((proc (lzlib-procedure int "LZ_decompress_errno" '(*))))
388 "DECODER can be a Scheme object or a pointer."
389 (let* ((error-number (proc (if (lz-decoder? decoder)
390 (lz-decoder->pointer decoder)
394 (define lz-decompress-finished?
395 (let ((proc (lzlib-procedure int "LZ_decompress_finished" '(*))))
397 "Return #t if all the data have been read and `lz-decompress-close' can
398 be safely called. Otherwise return #f."
399 (let ((ret (proc (lz-decoder->pointer decoder))))
403 (_ (throw 'lzlib-error 'lz-decompress-finished? (lz-decompress-error decoder))))))))
405 (define lz-decompress-member-finished?
406 (let ((proc (lzlib-procedure int "LZ_decompress_member_finished" '(*))))
408 "Return #t if the current member, in a multimember data stream, has
409 been fully read and `lz-decompress-restart-member' can be safely called.
410 Otherwise return #f."
411 (let ((ret (proc (lz-decoder->pointer decoder))))
415 (_ (throw 'lzlib-error 'lz-decompress-member-finished? (lz-decompress-error decoder))))))))
417 (define lz-decompress-member-version
418 (let ((proc (lzlib-procedure int "LZ_decompress_member_version" '(*))))
420 (let ((ret (proc (lz-decoder->pointer decoder))))
421 "Return the version of current member from member header."
423 (throw 'lzlib-error 'lz-decompress-data-position
424 (lz-decompress-error decoder))
427 (define lz-decompress-dictionary-size
428 (let ((proc (lzlib-procedure int "LZ_decompress_dictionary_size" '(*))))
430 (let ((ret (proc (lz-decoder->pointer decoder))))
431 "Return the dictionary size of current member from member header."
433 (throw 'lzlib-error 'lz-decompress-member-position
434 (lz-decompress-error decoder))
437 (define lz-decompress-data-crc
438 (let ((proc (lzlib-procedure unsigned-int "LZ_decompress_data_crc" '(*))))
440 (let ((ret (proc (lz-decoder->pointer decoder))))
441 "Return the 32 bit Cyclic Redundancy Check of the data decompressed
442 from the current member. The returned value is valid only when
443 `lz-decompress-member-finished' returns #t. "
445 (throw 'lzlib-error 'lz-decompress-member-position
446 (lz-decompress-error decoder))
449 (define lz-decompress-data-position
450 (let ((proc (lzlib-procedure uint64 "LZ_decompress_data_position" '(*))))
452 "Return the number of decompressed bytes already produced, but perhaps
453 not yet read, in the current member."
454 (let ((ret (proc (lz-decoder->pointer decoder))))
456 (throw 'lzlib-error 'lz-decompress-data-position
457 (lz-decompress-error decoder))
460 (define lz-decompress-member-position
461 (let ((proc (lzlib-procedure uint64 "LZ_decompress_member_position" '(*))))
463 "Return the number of input bytes already decompressed in the current
465 (let ((ret (proc (lz-decoder->pointer decoder))))
467 (throw 'lzlib-error 'lz-decompress-member-position
468 (lz-decompress-error decoder))
471 (define lz-decompress-total-in-size
472 (let ((proc (lzlib-procedure uint64 "LZ_decompress_total_in_size" '(*))))
474 (let ((ret (proc (lz-decoder->pointer decoder))))
475 "Return the total number of input bytes already compressed."
477 (throw 'lzlib-error 'lz-decompress-total-in-size
478 (lz-decompress-error decoder))
481 (define lz-decompress-total-out-size
482 (let ((proc (lzlib-procedure uint64 "LZ_decompress_total_out_size" '(*))))
484 (let ((ret (proc (lz-decoder->pointer decoder))))
485 "Return the total number of compressed bytes already produced, but
486 perhaps not yet read."
488 (throw 'lzlib-error 'lz-decompress-total-out-size
489 (lz-decompress-error decoder))
493 ;; High level functions.
494 (define* (lzread! decoder file-port bv
495 #:optional (start 0) (count (bytevector-length bv)))
496 "Read up to COUNT bytes from FILE-PORT into BV at offset START. Return the
497 number of uncompressed bytes actually read; it is zero if COUNT is zero or if
498 the end-of-stream has been reached."
499 ;; WARNING: Because we don't alternate between lz-reads and lz-writes, we can't
500 ;; process more than lz-decompress-write-size from the file-port.
501 (when (> count (lz-decompress-write-size decoder))
502 (set! count (lz-decompress-write-size decoder)))
503 (let ((file-bv (get-bytevector-n file-port count)))
504 (unless (eof-object? file-bv)
505 (lz-decompress-write decoder file-bv 0 (bytevector-length file-bv))))
508 (if (< start (bytevector-length bv))
510 (set! rd (lz-decompress-read decoder bv start (- (bytevector-length bv) start)))
511 (set! start (+ start rd))
512 (set! read (+ read rd)))
518 (define* (lzwrite encoder bv lz-port
519 #:optional (start 0) (count (bytevector-length bv)))
520 "Write up to COUNT bytes from BV at offset START into LZ-PORT. Return
521 the number of uncompressed bytes written, a non-negative integer."
524 (while (and (< 0 (lz-compress-write-size encoder))
526 (set! written (+ written
527 (lz-compress-write encoder bv (+ start written) (- count written)))))
529 (lz-compress-finish encoder))
530 (let ((lz-bv (make-bytevector written)))
532 (set! rd (lz-compress-read encoder lz-bv 0 (bytevector-length lz-bv)))
533 (put-bytevector lz-port lz-bv 0 rd)
534 (set! read (+ read rd))
537 ;; `written' is the total byte count of uncompressed data.
545 ;; Alist of (levels (dictionary-size match-length-limit)). 0 is the fastest.
546 ;; See bbexample.c in lzlib's source.
547 (define %compression-levels
549 (1 (,(bitwise-arithmetic-shift-left 1 20) 5))
550 (2 (,(bitwise-arithmetic-shift-left 3 19) 6))
551 (3 (,(bitwise-arithmetic-shift-left 1 21) 8))
552 (4 (,(bitwise-arithmetic-shift-left 3 20) 12))
553 (5 (,(bitwise-arithmetic-shift-left 1 22) 20))
554 (6 (,(bitwise-arithmetic-shift-left 1 23) 36))
555 (7 (,(bitwise-arithmetic-shift-left 1 24) 68))
556 (8 (,(bitwise-arithmetic-shift-left 3 23) 132))
557 (9 (,(bitwise-arithmetic-shift-left 1 25) 273))))
559 (define %default-compression-level
562 (define* (make-lzip-input-port port)
563 "Return an input port that decompresses data read from PORT, a file port.
564 PORT is automatically closed when the resulting port is closed."
565 (define decoder (lz-decompress-open))
567 (define (read! bv start count)
568 (lzread! decoder port bv start count))
570 (make-custom-binary-input-port "lzip-input" read! #f #f
572 (lz-decompress-close decoder)
575 (define* (make-lzip-output-port port
577 (level %default-compression-level))
578 "Return an output port that compresses data at the given LEVEL, using PORT,
579 a file port, as its sink. PORT is automatically closed when the resulting
581 (define encoder (apply lz-compress-open
582 (car (assoc-ref %compression-levels level))))
584 (define (write! bv start count)
585 (lzwrite encoder bv port start count))
587 (make-custom-binary-output-port "lzip-output" write! #f #f
589 (lz-compress-finish encoder)
590 ;; "lz-read" the trailing metadata added by `lz-compress-finish'.
591 (let ((lz-bv (make-bytevector (* 64 1024))))
593 (set! rd (lz-compress-read encoder lz-bv 0 (bytevector-length lz-bv)))
594 (put-bytevector port lz-bv 0 rd)
597 (lz-compress-close encoder)
600 (define* (call-with-lzip-input-port port proc)
601 "Call PROC with a port that wraps PORT and decompresses data read from it.
602 PORT is closed upon completion."
603 (let ((lzip (make-lzip-input-port port)))
609 (close-port lzip)))))
611 (define* (call-with-lzip-output-port port proc
613 (level %default-compression-level))
614 "Call PROC with an output port that wraps PORT and compresses data. PORT is
615 close upon completion."
616 (let ((lzip (make-lzip-output-port port
623 (close-port lzip)))))
625 ;;; lzlib.scm ends here