files: reap file_description_prog children
[rb-79.git] / sanitize-file.c
blob699a35dc87cc60ec390834ffdc7ba46c9f2906c0
1 /*
2 * Copyright (c) 2017, De Rais <derais@cock.li>
4 * Permission to use, copy, modify, and/or distribute this software for
5 * any purpose with or without fee is hereby granted, provided that the
6 * above copyright notice and this permission notice appear in all
7 * copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
10 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
11 * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
12 * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
13 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
14 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
15 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
16 * PERFORMANCE OF THIS SOFTWARE.
18 #include <errno.h>
19 #include <stdint.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <sys/types.h>
24 #include <sys/wait.h>
25 #include <time.h>
26 #include <unistd.h>
28 #include <magic.h>
30 #include "macros.h"
31 #include "rb79.h"
33 static magic_t mime_cookie;
34 static char *temp_dir;
36 /* Global configuration */
37 const struct configuration *conf;
39 /* A buffer this long can hold any static_thumbnail path */
40 static size_t max_static_thumb_len;
43 * Set up libmagic and all its friends
45 * Preconditions:
47 * - setup_sanitize_file() was not invoked more recently than
48 * clean_sanitize_file().
50 * Postconditions (success):
52 * - Any other function in this file may be safely called.
54 int setup_sanitize_file(const struct configuration *in_conf)
56 conf = in_conf;
57 max_static_thumb_len = 1;
59 for (size_t j = 0; j < conf->filetypes_num; j++) {
60 const struct filetype *f = &conf->filetypes[j];
62 if (f->static_thumbnail) {
63 size_t t = strlen(f->static_thumbnail);
65 if (t + 1 > max_static_thumb_len) {
66 max_static_thumb_len = t + 1;
71 if (!(mime_cookie = magic_open(MAGIC_MIME_TYPE))) {
72 ERROR_MESSAGE("magic_open(): %s", magic_error(mime_cookie));
74 return -1;
77 if (magic_load(mime_cookie, 0) < 0) {
78 ERROR_MESSAGE("magic_open(): %s", magic_error(mime_cookie));
80 return -1;
83 if (!(temp_dir = malloc(strlen("/tmp/rb79_conv_XXXXXX") + 1))) {
84 return -1;
87 sprintf(temp_dir, "/tmp/rb79_conv_XXXXXX");
89 if (!(mkdtemp(temp_dir))) {
90 PERROR_MESSAGE("mkdtemp");
92 return -1;
95 return 0;
99 * Check the MIME type of the content of buf
101 * Preconditions:
103 * - setup_sanitize_file() was invoked more recently than
104 * clean_sanitize_file().
106 * - buf is memory of length len.
108 * - out_filetype is not 0.
110 * - Overwriting *out_filetype shall not cause a memory leak.
112 * Postconditions (success):
114 * - The contents of buf have been examined by libmagic. If the
115 * MIME type corresponds to an entry in allowed_filetypes,
116 * *out_filetype is that entry. If not, *out_filetype is 0.
118 int sf_check_mime_type(const char *buf, size_t len, const struct
119 filetype **out_filetype)
121 const char *mime = 0;
123 if (!buf ||
124 !len) {
125 *out_filetype = 0;
127 return 0;
130 if (!(mime = magic_buffer(mime_cookie, buf, len))) {
131 ERROR_MESSAGE("magic_buffer(): %s", magic_error(mime_cookie));
133 return -1;
136 for (size_t j = 0; j < conf->filetypes_num; ++j) {
137 if (!strcmp(mime, conf->filetypes[j].mime_type)) {
138 *out_filetype = &conf->filetypes[j];
140 return 0;
144 LOG("The mime type \"%s\" is unsupported", mime);
146 return -1;
150 * Run file_description_prog, store the result in out_description
152 * It is intended that this be called lazily by write-thread, because
153 * the standard posting path (at present) inserts into the database
154 * before saving files (to reduce error paths).
156 * Preconditions:
158 * - setup_sanitize_file() was invoked more recently than
159 * clean_sanitize_file().
161 * - mimetype is a string of length mimetype_len, and filepath is
162 * a string of length filepath_len.
164 * - filepath is the absolute path to the file in question, so is
165 * something like "/var/www/rb79/m/src/1942067545.jpg".
167 * - The mimetype of that file is mimetype (this should be satisfied
168 * if both strings come from a db row), so is something like
169 * "image/jpeg".
171 * - out_description and out_len are not 0.
173 * - Overwriting *out_description shall not cause a memory leak.
175 * Postconditions (success):
177 * - *out_description is a string of length *out_len, and is
178 * suitable for inclusion directly into HTML.
180 int sf_describe_file(const char *mimetype, const char *filepath,
181 char **out_description, size_t *out_description_len)
183 /* XXX: do we need to do any signal magic here? */
184 int ret = -1;
185 int fds[2];
186 pid_t fret = 0;
187 ssize_t rret = 0;
188 size_t pos = 0;
189 size_t dlen = 128;
191 if (!filepath ||
192 !mimetype) {
193 ret = 0;
194 goto done;
197 if (pipe(fds) < 0) {
198 PERROR_MESSAGE("pipe");
199 goto done;
202 /* XXX: should we have an automagically growing buffer? */
203 if (!(*out_description = calloc(dlen, sizeof **out_description))) {
204 PERROR_MESSAGE("malloc");
205 goto done;
208 if ((fret = fork()) == -1) {
209 PERROR_MESSAGE("fork");
210 goto done;
213 if (!fret) {
214 /* We are child: ``1>fds[1] 2>&1'' */
215 close(fds[0]);
216 dup2(fds[1], STDOUT_FILENO);
217 dup2(fds[1], STDERR_FILENO);
218 execlp(conf->file_description_prog, conf->file_description_prog,
219 mimetype, filepath, (const char *) 0);
221 /* An error has occured but we dare not log */
222 _exit(0);
225 /* We are parent */
226 close(fds[1]);
228 do {
229 rret = read(fds[0], *out_description + pos, dlen - pos - 1);
231 if (rret == -1) {
232 close(fds[0]);
233 PERROR_MESSAGE("read");
234 goto done;
237 pos += rret;
239 if (pos >= dlen - 1) {
240 LOG("$(%s %s %s) is too long, aborting",
241 conf->file_description_prog, mimetype, filepath);
242 break;
244 } while (rret);
246 /* If file_description_prog is rampant, this blocks. */
247 waitpid(fret, 0, 0);
248 (*out_description)[pos] = '\0';
249 *out_description_len = pos;
250 close(fds[0]);
251 ret = 0;
252 done:
254 return ret;
258 * Treat the contents of buf as a file. Run board.install_command
259 * and board.thumb_creation_command (if relevant) to place it at
260 * some place like /var/www/rb79/sf/src/2684425523.png. Record that
261 * full path in *out_abs_path. Record a truncated version, suitable
262 * for use in <a>, into *out_path. Record the thumbnail's path (also
263 * suitable for <a>) in *out_thumb_path.
265 * Preconditions:
267 * - setup_sanitize_file() was invoked more recently than
268 * clean_sanitize_file().
270 * - board_idx represents a board, AND THE LOCK IS HELD.
272 * - buf is memory of length len, and when treated as a file has
273 * MIME type corresponding to *filetype.
275 * - now, out_abs_path, out_path, out_thumb_path, our_fault are
276 * not 0.
278 * - *now does not correspond to a timestamp of any other file
279 * (otherwise the filenames will clobber).
281 * - Overwriting *out_abs_path, *out_path, and *out_thumb_path
282 * shall not cause a memory leak.
284 * Postconditions (success):
286 * - *out_abs_path is an absolute path, like
287 * "/var/www/rb79/sf/src/12343252.jpg".
289 * - *out_path is a path suitable for use in <a>, like
290 * "/sf/src/12343242.jpg".
292 * - *out_thumb_path is also a path suitable for use in <a>.
294 * - *out_path_len and *out_thumb_path_len are the relevant lengths
295 * (note there is no out length for *out_abs_path.
297 * - buf was written to disk, and filetype's .install_command was
298 * run with that file as the first argument and *out_abs_path
299 * as the second argument.
301 * - If filetype.thumb_creation_command is not 0, it was run with
302 * that file as the first argument and the file corresponding
303 * to *out_thumb_path as the second.
305 * - Otherwise, *out_thumb_path is a copy of filetype.static_thumbnail.
307 int sf_install_files(size_t board_idx, const char *buf, size_t len, time_t *now,
308 const struct filetype *filetype, char **out_abs_path,
309 char **out_path,
310 size_t *out_path_len, char **out_thumb_path,
311 size_t *out_thumb_path_len,
312 int *our_fault)
314 int ret = -1;
315 size_t abs_path_len = snprintf(0, 0, "%s/%s/src/%ju.%s",
316 conf->static_www_folder,
317 conf->boards[board_idx].name,
318 (uintmax_t) -1,
319 filetype->ext);
320 size_t thumb_path_len = snprintf(0, 0, "%s/%s/src/%jus.png",
321 conf->static_www_folder,
322 conf->boards[board_idx].name,
323 (uintmax_t) -1);
324 size_t temp_path_len = snprintf(0, 0, "%s/%ju.%s", temp_dir,
325 (uintmax_t) -1, filetype->ext);
327 if (max_static_thumb_len > thumb_path_len) {
328 thumb_path_len = max_static_thumb_len;
331 char *abs_path = 0;
332 char *system_path = 0;
333 char *thumb_path = 0;
334 char *temp_path = 0;
335 size_t thumb_cmd_len = 0;
336 char *thumb_cmd = 0;
337 size_t full_cmd_len = 0;
338 char *full_cmd = 0;
339 FILE *out = 0;
341 if (!(abs_path = malloc(abs_path_len + 1))) {
342 PERROR_MESSAGE("malloc");
343 *our_fault = 1;
344 goto done;
347 if (!(system_path = malloc(abs_path_len + 1))) {
348 PERROR_MESSAGE("malloc");
349 *our_fault = 1;
350 goto done;
353 if (!(thumb_path = malloc(thumb_path_len + 1))) {
354 PERROR_MESSAGE("malloc");
355 *our_fault = 1;
356 goto done;
359 if (!(temp_path = malloc(temp_path_len + 1))) {
360 PERROR_MESSAGE("malloc");
361 *our_fault = 1;
362 goto done;
365 while (1) {
366 FILE *f = 0;
368 sprintf(abs_path, "%s/%s/src/%ju.%s", conf->static_www_folder,
369 conf->boards[board_idx].name, (uintmax_t) *now,
370 filetype->ext);
371 f = fopen(abs_path, "r");
373 if (f) {
375 * Since we have the filesystem lock, this
376 * should correctly avoid file clobbering.
378 fclose(f);
379 (*now)++;
380 continue;
383 if (errno == ENOENT) {
384 break;
387 PERROR_MESSAGE("fopen");
388 *our_fault = 1;
389 goto done;
392 /* At this point, *now and abs_path are correct */
393 sprintf(thumb_path, "%s/%s/src/%jus.png", conf->static_www_folder,
394 conf->boards[board_idx].name, (uintmax_t) *now);
395 sprintf(temp_path, "%s/%ju.%s", temp_dir, (uintmax_t) *now,
396 filetype->ext);
398 if (!(out = fopen(temp_path, "w"))) {
399 PERROR_MESSAGE("fopen");
400 *our_fault = 1;
401 goto done;
404 if (fwrite(buf, 1, len, out) < len) {
405 PERROR_MESSAGE("fwrite");
406 *our_fault = 1;
407 goto done;
410 fclose(out);
412 /* Now we have something on the filesystem */
413 if (filetype->thumb_creation_command) {
414 thumb_cmd_len = snprintf(0, 0, filetype->thumb_creation_command,
415 temp_path, thumb_path);
417 if (!(thumb_cmd = malloc(thumb_cmd_len + 1))) {
418 PERROR_MESSAGE("malloc");
419 *our_fault = 1;
420 goto done;
423 sprintf(thumb_cmd, filetype->thumb_creation_command, temp_path,
424 thumb_path);
425 int tc_ret = system(thumb_cmd);
427 if (!WIFEXITED(tc_ret)) {
428 ERROR_MESSAGE(
429 "Thumnail cmd \u00ab%s\u00bb did not exit",
430 thumb_cmd);
431 LOG("Thumnail cmd \u00ab%s\u00bb did not exit",
432 thumb_cmd);
433 goto done;
434 } else if (WEXITSTATUS(tc_ret)) {
435 LOG("Thumnail cmd \u00ab%s\u00bb exited %d", thumb_cmd,
436 (int) WEXITSTATUS(tc_ret));
437 goto done;
439 } else if (filetype->static_thumbnail) {
440 sprintf(thumb_path, "%s", filetype->static_thumbnail);
443 full_cmd_len = snprintf(0, 0, filetype->install_command, temp_path,
444 abs_path);
446 if (!(full_cmd = malloc(full_cmd_len + 1))) {
447 PERROR_MESSAGE("malloc");
448 *our_fault = 1;
449 goto done;
452 sprintf(full_cmd, filetype->install_command, temp_path, abs_path);
453 int fc_ret = system(full_cmd);
455 if (!WIFEXITED(fc_ret)) {
456 ERROR_MESSAGE("Install cmd \u00ab%s\u00bb did not exit",
457 full_cmd);
458 LOG("Install cmd \u00ab%s\u00bb did not exit", full_cmd);
459 goto done;
460 } else if (WEXITSTATUS(fc_ret)) {
461 LOG("Full cmd \u00ab%s\u00bb exited %d", full_cmd,
462 (int) WEXITSTATUS(fc_ret));
463 goto done;
466 /* Now *now is correct, files are where they need to be. */
467 ret = 0;
469 /* Cut base_path out for out_path and out_thumb_path */
470 if (filetype->thumb_creation_command) {
471 sprintf(thumb_path, "/%s/src/%jus.png",
472 conf->boards[board_idx].name, (uintmax_t) *now);
475 sprintf(system_path, "/%s/src/%ju.%s", conf->boards[board_idx].name,
476 (uintmax_t) *now, filetype->ext);
477 done:
479 if (temp_path) {
480 unlink(temp_path);
483 free(temp_path);
484 free(full_cmd);
485 free(thumb_cmd);
486 *out_abs_path = abs_path;
487 *out_path = system_path;
488 *out_path_len = strlen(*out_path);
489 *out_thumb_path = thumb_path;
490 *out_thumb_path_len = strlen(*out_thumb_path);
492 return ret;
496 * Clean up any memory from this file
498 * Postconditions (success):
500 * - Valgrind won't report any memory leas from this file.
502 * - setup_sanitize_file() can safely be called again.
504 int clean_sanitize_file(void)
506 /* XXX: is this safe if errors in setup_sanitize_file()? */
507 magic_close(mime_cookie);
508 mime_cookie = (magic_t) { 0 };
509 conf = 0;
512 * Note: we explicitly don't unlink temp_dir. If this thing
513 * crashes, I want to know what's there. It will certainly
514 * be the file of whatever caused us to crash. That's like
515 * a log file, and we shouldn't delete logs.
517 free(temp_dir);
518 temp_dir = 0;
520 return 0;