server: tie regex filters to db_insert_ban
[rb-79.git] / sanitize-file.c
blob7aa9faa5d1bcb3430d3242629139ba0c0fe01050
1 /*
2 * Copyright (c) 2017-2020, De Rais <derais@cock.li>
4 * Permission to use, copy, modify, and/or distribute this software for
5 * any purpose with or without fee is hereby granted, provided that the
6 * above copyright notice and this permission notice appear in all
7 * copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
10 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
11 * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
12 * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
13 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
14 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
15 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
16 * PERFORMANCE OF THIS SOFTWARE.
18 #include <errno.h>
19 #include <stdint.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <sys/types.h>
24 #include <sys/wait.h>
25 #include <time.h>
26 #include <unistd.h>
28 #include <magic.h>
30 #include "macros.h"
31 #include "rb79.h"
33 static magic_t mime_cookie;
34 static char *temp_dir;
36 /* Global configuration */
37 const struct configuration *conf;
39 /* A buffer this long can hold any static_thumbnail path */
40 static size_t max_static_thumb_len;
43 * Set up libmagic and all its friends
45 * Preconditions:
47 * - setup_sanitize_file() was not invoked more recently than
48 * clean_sanitize_file().
50 * Postconditions (success):
52 * - Any other function in this file may be safely called.
54 int
55 setup_sanitize_file(const struct configuration *in_conf)
57 conf = in_conf;
58 max_static_thumb_len = 1;
60 for (size_t j = 0; j < conf->filetypes_num; j++) {
61 const struct filetype *f = &conf->filetypes[j];
63 if (f->static_thumbnail) {
64 size_t t = strlen(f->static_thumbnail);
66 if (t + 1 > max_static_thumb_len) {
67 max_static_thumb_len = t + 1;
72 if (!(mime_cookie = magic_open(MAGIC_MIME_TYPE))) {
73 ERROR_MESSAGE("magic_open(): %s", magic_error(mime_cookie));
75 return -1;
78 if (magic_load(mime_cookie, 0) < 0) {
79 ERROR_MESSAGE("magic_open(): %s", magic_error(mime_cookie));
81 return -1;
84 if (!(temp_dir = malloc(strlen(conf->temp_dir_template) + 1))) {
85 return -1;
88 sprintf(temp_dir, "%s", conf->temp_dir_template);
90 if (!(mkdtemp(temp_dir))) {
91 PERROR_MESSAGE("mkdtemp");
93 return -1;
96 return 0;
100 * Check the MIME type of the content of buf
102 * Preconditions:
104 * - setup_sanitize_file() was invoked more recently than
105 * clean_sanitize_file().
107 * - buf is memory of length len.
109 * - out_filetype is not 0.
111 * - Overwriting *out_filetype shall not cause a memory leak.
113 * Postconditions (success):
115 * - The contents of buf have been examined by libmagic. If the
116 * MIME type corresponds to an entry in allowed_filetypes,
117 * *out_filetype is that entry. If not, *out_filetype is 0.
120 sf_check_mime_type(const char *buf, size_t len, const struct
121 filetype **out_filetype)
123 const char *mime = 0;
125 if (!buf ||
126 !len) {
127 *out_filetype = 0;
129 return 0;
132 if (!(mime = magic_buffer(mime_cookie, buf, len))) {
133 ERROR_MESSAGE("magic_buffer(): %s", magic_error(mime_cookie));
135 return -1;
138 for (size_t j = 0; j < conf->filetypes_num; ++j) {
139 if (!strcmp(mime, conf->filetypes[j].mime_type)) {
140 *out_filetype = &conf->filetypes[j];
142 return 0;
146 LOG("The mime type \"%s\" is unsupported", mime);
148 return -1;
152 * Run file_description_prog, store the result in out_description
154 * It is intended that this be called lazily by write-thread, because
155 * the standard posting path (at present) inserts into the database
156 * before saving files (to reduce error paths).
158 * Preconditions:
160 * - setup_sanitize_file() was invoked more recently than
161 * clean_sanitize_file().
163 * - mimetype is a string of length mimetype_len, and filepath is
164 * a string of length filepath_len.
166 * - filepath is the absolute path to the file in question, so is
167 * something like "/var/www/rb79/m/src/1942067545.jpg".
169 * - The mimetype of that file is mimetype (this should be satisfied
170 * if both strings come from a db row), so is something like
171 * "image/jpeg".
173 * - out_description and out_len are not 0.
175 * - Overwriting *out_description shall not cause a memory leak.
177 * Postconditions (success):
179 * - *out_description is a string of length *out_len, and is
180 * suitable for inclusion directly into HTML.
183 sf_describe_file(const char *mimetype, const char *filepath,
184 char **out_description, size_t *out_description_len)
186 /* XXX: do we need to do any signal magic here? */
187 int ret = -1;
188 int fds[2];
189 pid_t fret = 0;
190 ssize_t rret = 0;
191 size_t pos = 0;
192 size_t dlen = 128;
194 if (!filepath ||
195 !mimetype) {
196 ret = 0;
197 goto done;
200 if (pipe(fds) < 0) {
201 PERROR_MESSAGE("pipe");
202 goto done;
205 /* XXX: should we have an automagically growing buffer? */
206 if (!(*out_description = calloc(dlen, sizeof **out_description))) {
207 PERROR_MESSAGE("calloc");
208 goto done;
211 if ((fret = fork()) == -1) {
212 PERROR_MESSAGE("fork");
213 goto done;
216 if (!fret) {
217 /* We are child: ``1>fds[1] 2>&1'' */
218 close(fds[0]);
219 dup2(fds[1], STDOUT_FILENO);
220 dup2(fds[1], STDERR_FILENO);
221 execlp(conf->file_description_prog, conf->file_description_prog,
222 mimetype, filepath, (const char *) 0);
224 /* An error has occured but we dare not log */
225 _exit(0);
228 /* We are parent */
229 close(fds[1]);
231 do {
232 rret = read(fds[0], *out_description + pos, dlen - pos - 1);
234 if (rret == -1) {
235 close(fds[0]);
236 PERROR_MESSAGE("read");
237 goto done;
240 pos += rret;
242 if (pos >= dlen - 1) {
243 LOG("$(%s %s %s) is too long, aborting",
244 conf->file_description_prog, mimetype, filepath);
245 break;
247 } while (rret);
249 /* If file_description_prog is rampant, this blocks. */
250 waitpid(fret, 0, 0);
251 (*out_description)[pos] = '\0';
252 *out_description_len = pos;
253 close(fds[0]);
254 ret = 0;
255 done:
257 return ret;
261 * Treat the contents of buf as a file. Run board.install_command
262 * and board.thumb_creation_command (if relevant) to place it at
263 * some place like /var/www/rb79/sf/src/2684425523.png. Record that
264 * full path in *out_abs_path. Record a truncated version, suitable
265 * for use in <a>, into *out_path. Record the thumbnail's path (also
266 * suitable for <a>) in *out_thumb_path.
268 * Preconditions:
270 * - setup_sanitize_file() was invoked more recently than
271 * clean_sanitize_file().
273 * - board_idx represents a board, AND THE LOCK IS HELD.
275 * - buf is memory of length len, and when treated as a file has
276 * MIME type corresponding to *filetype.
278 * - now, out_abs_path, out_path, out_thumb_path, our_fault are
279 * not 0.
281 * - *now does not correspond to a timestamp of any other file
282 * (otherwise the filenames will clobber).
284 * - Overwriting *out_abs_path, *out_path, and *out_thumb_path
285 * shall not cause a memory leak.
287 * Postconditions (success):
289 * - *out_abs_path is an absolute path, like
290 * "/var/www/rb79/sf/src/12343252.jpg".
292 * - *out_path is a path suitable for use in <a>, like
293 * "/sf/src/12343242.jpg".
295 * - *out_thumb_path is also a path suitable for use in <a>.
297 * - *out_path_len and *out_thumb_path_len are the relevant lengths
298 * (note there is no out length for *out_abs_path.
300 * - buf was written to disk, and filetype's .install_command was
301 * run with that file as the first argument and *out_abs_path
302 * as the second argument.
304 * - If filetype.thumb_creation_command is not 0, it was run with
305 * that file as the first argument and the file corresponding
306 * to *out_thumb_path as the second.
308 * - Otherwise, *out_thumb_path is a copy of filetype.static_thumbnail.
311 sf_install_files(size_t board_idx, const char *buf, size_t len, time_t *now,
312 const struct filetype *filetype, char **out_abs_path,
313 char **out_path,
314 size_t *out_path_len, char **out_thumb_path,
315 size_t *out_thumb_path_len,
316 int *our_fault)
318 int ret = -1;
319 size_t abs_path_len = snprintf(0, 0, "%s/%s/src/%ju.%s",
320 conf->static_www_folder,
321 conf->boards[board_idx].name,
322 (uintmax_t) -1,
323 filetype->ext);
324 size_t thumb_path_len = snprintf(0, 0, "%s/%s/src/%jus.png",
325 conf->static_www_folder,
326 conf->boards[board_idx].name,
327 (uintmax_t) -1);
328 size_t temp_path_len = snprintf(0, 0, "%s/%ju.%s", temp_dir,
329 (uintmax_t) -1, filetype->ext);
331 if (max_static_thumb_len > thumb_path_len) {
332 thumb_path_len = max_static_thumb_len;
335 char *abs_path = 0;
336 char *system_path = 0;
337 char *thumb_path = 0;
338 char *temp_path = 0;
339 size_t thumb_cmd_len = 0;
340 char *thumb_cmd = 0;
341 size_t full_cmd_len = 0;
342 char *full_cmd = 0;
343 FILE *out = 0;
345 if (abs_path_len + 1 < abs_path_len) {
346 ERROR_MESSAGE("overflow");
347 *our_fault = 1;
348 goto done;
351 if (!(abs_path = malloc(abs_path_len + 1))) {
352 PERROR_MESSAGE("malloc");
353 *our_fault = 1;
354 goto done;
357 if (!(system_path = malloc(abs_path_len + 1))) {
358 PERROR_MESSAGE("malloc");
359 *our_fault = 1;
360 goto done;
363 if (thumb_path_len + 1 < thumb_path_len) {
364 ERROR_MESSAGE("overflow");
365 *our_fault = 1;
366 goto done;
369 if (!(thumb_path = malloc(thumb_path_len + 1))) {
370 PERROR_MESSAGE("malloc");
371 *our_fault = 1;
372 goto done;
375 if (temp_path_len + 1 < temp_path_len) {
376 ERROR_MESSAGE("overflow");
377 *our_fault = 1;
378 goto done;
381 if (!(temp_path = malloc(temp_path_len + 1))) {
382 PERROR_MESSAGE("malloc");
383 *our_fault = 1;
384 goto done;
387 while (1) {
388 FILE *f = 0;
390 sprintf(abs_path, "%s/%s/src/%ju.%s", conf->static_www_folder,
391 conf->boards[board_idx].name, (uintmax_t) *now,
392 filetype->ext);
393 f = fopen(abs_path, "r");
395 if (f) {
397 * Since we have the filesystem lock, this
398 * should correctly avoid file clobbering.
400 if (fclose(f)) {
401 PERROR_MESSAGE("fclose");
402 *our_fault = 1;
403 goto done;
406 (*now)++;
407 continue;
410 if (errno == ENOENT) {
411 break;
414 PERROR_MESSAGE("fopen");
415 *our_fault = 1;
416 goto done;
419 /* At this point, *now and abs_path are correct */
420 sprintf(thumb_path, "%s/%s/src/%jus.png", conf->static_www_folder,
421 conf->boards[board_idx].name, (uintmax_t) *now);
422 sprintf(temp_path, "%s/%ju.%s", temp_dir, (uintmax_t) *now,
423 filetype->ext);
425 if (!(out = fopen(temp_path, "w"))) {
426 PERROR_MESSAGE("fopen");
427 ERROR_MESSAGE("cannot open path \"%s\"", temp_path);
428 *our_fault = 1;
429 goto done;
432 if (fwrite(buf, 1, len, out) < len) {
433 PERROR_MESSAGE("fwrite");
434 ERROR_MESSAGE("cannot write to path \"%s\"", temp_path);
435 *our_fault = 1;
436 goto done;
439 if (fclose(out)) {
440 PERROR_MESSAGE("fclose");
441 *our_fault = 1;
442 goto done;
445 /* Now we have something on the filesystem */
446 if (filetype->thumb_creation_command) {
447 thumb_cmd_len = snprintf(0, 0, filetype->thumb_creation_command,
448 temp_path, thumb_path);
450 if (!(thumb_cmd = malloc(thumb_cmd_len + 1))) {
451 PERROR_MESSAGE("malloc");
452 *our_fault = 1;
453 goto done;
456 sprintf(thumb_cmd, filetype->thumb_creation_command, temp_path,
457 thumb_path);
458 int tc_ret = system(thumb_cmd);
460 if (!WIFEXITED(tc_ret)) {
461 ERROR_MESSAGE(
462 "Thumnail cmd \u00ab%s\u00bb did not exit",
463 thumb_cmd);
464 LOG("Thumnail cmd \u00ab%s\u00bb did not exit",
465 thumb_cmd);
466 goto done;
467 } else if (WEXITSTATUS(tc_ret)) {
468 LOG("Thumnail cmd \u00ab%s\u00bb exited %d", thumb_cmd,
469 (int) WEXITSTATUS(tc_ret));
470 goto done;
472 } else if (filetype->static_thumbnail) {
473 sprintf(thumb_path, "%s", filetype->static_thumbnail);
476 full_cmd_len = snprintf(0, 0, filetype->install_command, temp_path,
477 abs_path);
479 if (full_cmd_len + 1 < full_cmd_len) {
480 ERROR_MESSAGE("overflow");
481 *our_fault = 1;
482 goto done;
485 if (!(full_cmd = malloc(full_cmd_len + 1))) {
486 PERROR_MESSAGE("malloc");
487 *our_fault = 1;
488 goto done;
491 sprintf(full_cmd, filetype->install_command, temp_path, abs_path);
492 int fc_ret = system(full_cmd);
494 if (!WIFEXITED(fc_ret)) {
495 ERROR_MESSAGE("Install cmd \u00ab%s\u00bb did not exit",
496 full_cmd);
497 LOG("Install cmd \u00ab%s\u00bb did not exit", full_cmd);
498 goto done;
499 } else if (WEXITSTATUS(fc_ret)) {
500 LOG("Full cmd \u00ab%s\u00bb exited %d", full_cmd,
501 (int) WEXITSTATUS(fc_ret));
502 goto done;
505 /* Now *now is correct, files are where they need to be. */
506 ret = 0;
508 /* Cut base_path out for out_path and out_thumb_path */
509 if (filetype->thumb_creation_command) {
510 sprintf(thumb_path, "/%s/src/%jus.png",
511 conf->boards[board_idx].name, (uintmax_t) *now);
514 sprintf(system_path, "/%s/src/%ju.%s", conf->boards[board_idx].name,
515 (uintmax_t) *now, filetype->ext);
516 done:
518 if (temp_path) {
519 unlink(temp_path);
522 free(temp_path);
523 free(full_cmd);
524 free(thumb_cmd);
525 *out_abs_path = abs_path;
526 *out_path = system_path;
527 *out_path_len = strlen(*out_path);
528 *out_thumb_path = thumb_path;
529 *out_thumb_path_len = strlen(*out_thumb_path);
531 return ret;
535 * Clean up any memory from this file
537 * Postconditions (success):
539 * - Valgrind won't report any memory leas from this file.
541 * - setup_sanitize_file() can safely be called again.
544 clean_sanitize_file(void)
546 /* XXX: is this safe if errors in setup_sanitize_file()? */
547 magic_close(mime_cookie);
548 mime_cookie = (magic_t) { 0 };
549 conf = 0;
552 * Note: we explicitly don't unlink temp_dir. If this thing
553 * crashes, I want to know what's there. It will certainly
554 * be the file of whatever caused us to crash. That's like
555 * a log file, and we shouldn't delete logs.
557 free(temp_dir);
558 temp_dir = 0;
560 return 0;