Docs: Language fix to 01_compress_easy.c.
[xz/debian.git] / doc / examples / 01_compress_easy.c
blobe6dd2b0cda31d3772d62bd379fab9cacc492aabf
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file 01_compress_easy.c
4 /// \brief Compress from stdin to stdout in multi-call mode
5 ///
6 /// Usage: ./01_compress_easy PRESET < INFILE > OUTFILE
7 ///
8 /// Example: ./01_compress_easy 6 < foo > foo.xz
9 //
10 // Author: Lasse Collin
12 // This file has been put into the public domain.
13 // You can do whatever you want with this file.
15 ///////////////////////////////////////////////////////////////////////////////
17 #include <stdbool.h>
18 #include <stdlib.h>
19 #include <stdio.h>
20 #include <string.h>
21 #include <errno.h>
22 #include <lzma.h>
25 static void
26 show_usage_and_exit(const char *argv0)
28 fprintf(stderr, "Usage: %s PRESET < INFILE > OUTFILE\n"
29 "PRESET is a number 0-9 and can optionally be "
30 "followed by `e' to indicate extreme preset\n",
31 argv0);
32 exit(EXIT_FAILURE);
36 static uint32_t
37 get_preset(int argc, char **argv)
39 // One argument whose first char must be 0-9.
40 if (argc != 2 || argv[1][0] < '0' || argv[1][0] > '9')
41 show_usage_and_exit(argv[0]);
43 // Calculate the preste level 0-9.
44 uint32_t preset = argv[1][0] - '0';
46 // If there is a second char, it must be 'e'. It will set
47 // the LZMA_PRESET_EXTREME flag.
48 if (argv[1][1] != '\0') {
49 if (argv[1][1] != 'e' || argv[1][2] != '\0')
50 show_usage_and_exit(argv[0]);
52 preset |= LZMA_PRESET_EXTREME;
55 return preset;
59 static bool
60 init_encoder(lzma_stream *strm, uint32_t preset)
62 // Initialize the encoder using a preset. Set the integrity to check
63 // to CRC64, which is the default in the xz command line tool. If
64 // the .xz file needs to be decompressed with XZ Embedded, use
65 // LZMA_CHECK_CRC32 instead.
66 lzma_ret ret = lzma_easy_encoder(strm, preset, LZMA_CHECK_CRC64);
68 // Return successfully if the initialization went fine.
69 if (ret == LZMA_OK)
70 return true;
72 // Something went wrong. The possible errors are documented in
73 // lzma/container.h (src/liblzma/api/lzma/container.h in the source
74 // package or e.g. /usr/include/lzma/container.h depending on the
75 // install prefix).
76 const char *msg;
77 switch (ret) {
78 case LZMA_MEM_ERROR:
79 msg = "Memory allocation failed";
80 break;
82 case LZMA_OPTIONS_ERROR:
83 msg = "Specified preset is not supported";
84 break;
86 case LZMA_UNSUPPORTED_CHECK:
87 msg = "Specified integrity check is not supported";
88 break;
90 default:
91 // This is most likely LZMA_PROG_ERROR indicating a bug in
92 // this program or in liblzma. It is inconvenient to have a
93 // separate error message for errors that should be impossible
94 // to occur, but knowing the error code is important for
95 // debugging. That's why it is good to print the error code
96 // at least when there is no good error message to show.
97 msg = "Unknown error, possibly a bug";
98 break;
101 fprintf(stderr, "Error initializing the encoder: %s (error code %u)\n",
102 msg, ret);
103 return false;
107 static bool
108 compress(lzma_stream *strm, FILE *infile, FILE *outfile)
110 // This will be LZMA_RUN until the end of the input file is reached.
111 // This tells lzma_code() when there will be no more input.
112 lzma_action action = LZMA_RUN;
114 // Buffers to temporarily hold uncompressed input
115 // and compressed output.
116 uint8_t inbuf[BUFSIZ];
117 uint8_t outbuf[BUFSIZ];
119 // Initialize the input and output pointers. Initializing next_in
120 // and avail_in isn't really necessary when we are going to encode
121 // just one file since LZMA_STREAM_INIT takes care of initializing
122 // those already. But it doesn't hurt much and it will be needed
123 // if encoding more than one file like we will in 02_decompress.c.
125 // While we don't care about strm->total_in or strm->total_out in this
126 // example, it is worth noting that initializing the encoder will
127 // always reset total_in and total_out to zero. But the encoder
128 // initialization doesn't touch next_in, avail_in, next_out, or
129 // avail_out.
130 strm->next_in = NULL;
131 strm->avail_in = 0;
132 strm->next_out = outbuf;
133 strm->avail_out = sizeof(outbuf);
135 // Loop until the file has been successfully compressed or until
136 // an error occurs.
137 while (true) {
138 // Fill the input buffer if it is empty.
139 if (strm->avail_in == 0 && !feof(infile)) {
140 strm->next_in = inbuf;
141 strm->avail_in = fread(inbuf, 1, sizeof(inbuf),
142 infile);
144 if (ferror(infile)) {
145 fprintf(stderr, "Read error: %s\n",
146 strerror(errno));
147 return false;
150 // Once the end of the input file has been reached,
151 // we need to tell lzma_code() that no more input
152 // will be coming and that it should finish the
153 // encoding.
154 if (feof(infile))
155 action = LZMA_FINISH;
158 // Tell liblzma do the actual encoding.
160 // This reads up to strm->avail_in bytes of input starting
161 // from strm->next_in. avail_in will be decremented and
162 // next_in incremented by an equal amount to match the
163 // number of input bytes consumed.
165 // Up to strm->avail_out bytes of compressed output will be
166 // written starting from strm->next_out. avail_out and next_out
167 // will be incremented by an equal amount to match the number
168 // of output bytes written.
170 // The encoder has to do internal buffering, which means that
171 // it may take quite a bit of input before the same data is
172 // available in compressed form in the output buffer.
173 lzma_ret ret = lzma_code(strm, action);
175 // If the output buffer is full or if the compression finished
176 // successfully, write the data from the output bufffer to
177 // the output file.
178 if (strm->avail_out == 0 || ret == LZMA_STREAM_END) {
179 // When lzma_code() has returned LZMA_STREAM_END,
180 // the output buffer is likely to be only partially
181 // full. Calculate how much new data there is to
182 // be written to the output file.
183 size_t write_size = sizeof(outbuf) - strm->avail_out;
185 if (fwrite(outbuf, 1, write_size, outfile)
186 != write_size) {
187 fprintf(stderr, "Write error: %s\n",
188 strerror(errno));
189 return false;
192 // Reset next_out and avail_out.
193 strm->next_out = outbuf;
194 strm->avail_out = sizeof(outbuf);
197 // Normally the return value of lzma_code() will be LZMA_OK
198 // until everything has been encoded.
199 if (ret != LZMA_OK) {
200 // Once everything has been encoded successfully, the
201 // return value of lzma_code() will be LZMA_STREAM_END.
203 // It is important to check for LZMA_STREAM_END. Do not
204 // assume that getting ret != LZMA_OK would mean that
205 // everything has gone well.
206 if (ret == LZMA_STREAM_END)
207 return true;
209 // It's not LZMA_OK nor LZMA_STREAM_END,
210 // so it must be an error code. See lzma/base.h
211 // (src/liblzma/api/lzma/base.h in the source package
212 // or e.g. /usr/include/lzma/base.h depending on the
213 // install prefix) for the list and documentation of
214 // possible values. Most values listen in lzma_ret
215 // enumeration aren't possible in this example.
216 const char *msg;
217 switch (ret) {
218 case LZMA_MEM_ERROR:
219 msg = "Memory allocation failed";
220 break;
222 case LZMA_DATA_ERROR:
223 // This error is returned if the compressed
224 // or uncompressed size get near 8 EiB
225 // (2^63 bytes) because that's where the .xz
226 // file format size limits currently are.
227 // That is, the possibility of this error
228 // is mostly theoretical unless you are doing
229 // something very unusual.
231 // Note that strm->total_in and strm->total_out
232 // have nothing to do with this error. Changing
233 // those variables won't increase or decrease
234 // the chance of getting this error.
235 msg = "File size limits exceeded";
236 break;
238 default:
239 // This is most likely LZMA_PROG_ERROR, but
240 // if this program is buggy (or liblzma has
241 // a bug), it may be e.g. LZMA_BUF_ERROR or
242 // LZMA_OPTIONS_ERROR too.
244 // It is inconvenient to have a separate
245 // error message for errors that should be
246 // impossible to occur, but knowing the error
247 // code is important for debugging. That's why
248 // it is good to print the error code at least
249 // when there is no good error message to show.
250 msg = "Unknown error, possibly a bug";
251 break;
254 fprintf(stderr, "Encoder error: %s (error code %u)\n",
255 msg, ret);
256 return false;
262 extern int
263 main(int argc, char **argv)
265 // Get the preset number from the command line.
266 uint32_t preset = get_preset(argc, argv);
268 // Initialize a lzma_stream structure. When it is allocated on stack,
269 // it is simplest to use LZMA_STREAM_INIT macro like below. When it
270 // is allocated on heap, using memset(strmptr, 0, sizeof(*strmptr))
271 // works (as long as NULL pointers are represented with zero bits
272 // as they are on practically all computers today).
273 lzma_stream strm = LZMA_STREAM_INIT;
275 // Initialize the encoder. If it succeeds, compress from
276 // stdin to stdout.
277 bool success = init_encoder(&strm, preset);
278 if (success)
279 success = compress(&strm, stdin, stdout);
281 // Free the memory allocated for the encoder. If we were encoding
282 // multiple files, this would only need to be done after the last
283 // file. See 02_decompress.c for handling of multiple files.
285 // It is OK to call lzma_end() multiple times or when it hasn't been
286 // actually used except initialized with LZMA_STREAM_INIT.
287 lzma_end(&strm);
289 // Close stdout to catch possible write errors that can occur
290 // when pending data is flushed from the stdio buffers.
291 if (fclose(stdout)) {
292 fprintf(stderr, "Write error: %s\n", strerror(errno));
293 success = false;
296 return success ? EXIT_SUCCESS : EXIT_FAILURE;