Calls to strdup() alloc memory using malloc(), and there's a leak because we never...
[libjio.git] / libjio.c
bloba6fa8db7747124fbb7bc18cb1b0e55ea7e35cd1a
2 /*
3 * libjio - A library for Journaled I/O
4 * Alberto Bertogli (albertogli@telpin.com.ar)
5 */
7 #include <sys/types.h>
8 #include <sys/stat.h>
9 #include <fcntl.h>
10 #include <unistd.h>
11 #include <stdlib.h>
12 #include <limits.h>
13 #include <string.h>
14 #include <libgen.h>
15 #include <stdio.h>
16 #include <dirent.h>
17 #include <sys/uio.h>
19 #include "libjio.h"
23 * small util functions
26 /* like lockf, but lock always from the beginning of the file */
27 static off_t plockf(int fd, int cmd, off_t offset, off_t len)
29 struct flock fl;
30 int op;
32 if (cmd == F_LOCK) {
33 fl.l_type = F_WRLCK;
34 op = F_SETLKW;
35 } else if (cmd == F_ULOCK) {
36 fl.l_type = F_UNLCK;
37 op = F_SETLKW;
38 } else if (cmd == F_TLOCK) {
39 fl.l_type = F_WRLCK;
40 op = F_SETLK;
41 } else
42 return 0;
44 fl.l_whence = SEEK_SET;
45 fl.l_start = offset;
46 fl.l_len = len;
48 return fcntl(fd, op, &fl);
51 /* like pread but either fails, or return a complete read; if we return less
52 * than count is because EOF was reached */
53 static ssize_t spread(int fd, void *buf, size_t count, off_t offset)
55 int rv, c;
57 c = 0;
59 while (c < count) {
60 rv = pread(fd, buf + c, count - c, offset + c);
62 if (rv == count)
63 /* we're done */
64 return count;
65 else if (rv < 0)
66 /* error */
67 return rv;
68 else if (rv == 0)
69 /* got EOF */
70 return c;
72 /* incomplete read, keep on reading */
73 c += rv;
76 return count;
79 /* like spread() but for pwrite() */
80 static ssize_t spwrite(int fd, void *buf, size_t count, off_t offset)
82 int rv, c;
84 c = 0;
86 while (c < count) {
87 rv = pwrite(fd, buf + c, count - c, offset + c);
89 if (rv == count)
90 /* we're done */
91 return count;
92 else if (rv <= 0)
93 /* error/nothing was written */
94 return rv;
96 /* incomplete write, keep on writing */
97 c += rv;
100 return count;
103 /* build the journal directory name out of the filename */
104 static int get_jdir(char *filename, char *jdir)
106 char *base, *baset;
107 char *dir, *dirt;
109 baset = strdup(filename);
110 if (baset == NULL)
111 return 0;
112 base = basename(baset);
114 dirt = strdup(filename);
115 if (baset == NULL)
116 return 0;
117 dir = dirname(dirt);
119 snprintf(jdir, PATH_MAX, "%s/.%s.jio", dir, base);
121 free(baset);
122 free(dirt);
124 return 1;
127 /* build the filename of a given transaction */
128 static int get_jtfile(char *filename, int tid, char *jtfile)
130 char *base, *baset;
131 char *dir, *dirt;
133 baset = strdup(filename);
134 if (baset == NULL)
135 return 0;
136 base = basename(baset);
138 dirt = strdup(filename);
139 if (baset == NULL)
140 return 0;
141 dir = dirname(dirt);
143 snprintf(jtfile, PATH_MAX, "%s/.%s.jio/%d", dir, base, tid);
145 free(baset);
146 free(dirt);
148 return 1;
151 /* gets a new transaction id */
152 static unsigned int get_tid(struct jfs *fs)
154 unsigned int curid;
155 int r, rv;
157 /* lock the whole file */
158 plockf(fs->jfd, F_LOCK, 0, 0);
160 /* read the current max. curid */
161 r = spread(fs->jfd, &curid, sizeof(curid), 0);
162 if (r != sizeof(curid)) {
163 rv = 0;
164 goto exit;
167 /* increment it and handle overflows */
168 rv = curid + 1;
169 if (rv == 0)
170 rv = 1;
172 /* write to the file descriptor */
173 r = spwrite(fs->jfd, &rv, sizeof(rv), 0);
174 if (r != sizeof(curid)) {
175 rv = 0;
176 goto exit;
179 exit:
180 plockf(fs->jfd, F_ULOCK, 0, 0);
181 return rv;
184 /* frees a transaction id */
185 static void free_tid(struct jfs *fs, unsigned int tid)
187 unsigned int curid, i;
188 int r;
189 char name[PATH_MAX];
191 /* lock the whole file */
192 plockf(fs->jfd, F_LOCK, 0, 0);
194 /* read the current max. curid */
195 r = spread(fs->jfd, &curid, sizeof(curid), 0);
196 if (r != sizeof(curid)) {
197 goto exit;
200 if (tid < curid) {
201 /* we're not freeing the max. curid, so we just return */
202 goto exit;
203 } else {
204 /* look up the new max. */
205 for (i = curid - 1; i > 0; i--) {
206 /* this can fail if we're low on mem, but we don't
207 * care checking here because the problem will come
208 * out later and we can fail more properly */
209 get_jtfile(fs->name, i, name);
210 if (access(name, R_OK | W_OK) == 0) {
211 curid = i;
212 break;
216 /* and save it */
217 r = spwrite(fs->jfd, &i, sizeof(i), 0);
218 if (r != sizeof(curid)) {
219 goto exit;
223 exit:
224 plockf(fs->jfd, F_ULOCK, 0, 0);
225 return;
230 * transaction functions
233 /* initialize a transaction structure */
234 void jtrans_init(struct jfs *fs, struct jtrans *ts)
236 ts->fs = fs;
237 ts->name = NULL;
238 ts->id = 0;
239 ts->flags = 0;
240 ts->buf = NULL;
241 ts->len = 0;
242 ts->offset = 0;
243 ts->udata = NULL;
244 ts->ulen = 0;
245 ts->pdata = NULL;
246 ts->plen = 0;
249 /* free a transaction structure */
250 void jtrans_free(struct jtrans *ts)
252 /* NOTE: we only really free the name and previous data, which are the
253 * things _we_ allocate; the user data is caller stuff */
254 ts->fs = NULL;
255 if (ts->name)
256 free(ts->name);
257 if (ts->pdata)
258 free(ts->pdata);
259 free(ts);
262 /* commit a transaction */
263 int jtrans_commit(struct jtrans *ts)
265 int id, fd, rv, t;
266 char *name;
267 void *buf_init, *bufp;
269 name = (char *) malloc(PATH_MAX);
270 if (name == NULL)
271 return -1;
273 id = get_tid(ts->fs);
274 if (id == 0)
275 return -1;
277 /* open the transaction file */
278 if (!get_jtfile(ts->fs->name, id, name))
279 return -1;
280 fd = open(name, O_RDWR | O_CREAT | O_TRUNC | O_LARGEFILE, 0600);
281 if (fd < 0)
282 return -1;
284 /* and lock it */
285 plockf(fd, F_LOCK, 0, 0);
287 ts->id = id;
288 ts->name = name;
290 /* lock the file region to work on */
291 if (!(ts->fs->flags & J_NOLOCK))
292 plockf(ts->fs->fd, F_LOCK, ts->offset, ts->len);
294 /* first the static data */
296 buf_init = malloc(J_DISKTFIXSIZE);
297 if (buf_init == NULL)
298 return -1;
300 bufp = buf_init;
302 memcpy(bufp, (void *) &(ts->id), sizeof(ts->id));
303 bufp += 4;
305 memcpy(bufp, (void *) &(ts->flags), sizeof(ts->flags));
306 bufp += 4;
308 memcpy(bufp, (void *) &(ts->len), sizeof(ts->len));
309 bufp += 4;
311 memcpy(bufp, (void *) &(ts->ulen), sizeof(ts->ulen));
312 bufp += 4;
314 memcpy(bufp, (void *) &(ts->offset), sizeof(ts->offset));
315 bufp += 8;
317 rv = spwrite(fd, buf_init, J_DISKTFIXSIZE, 0);
318 if (rv != J_DISKTFIXSIZE)
319 goto exit;
321 free(buf_init);
324 /* and now the variable part */
326 if (ts->udata) {
327 rv = spwrite(fd, ts->udata, ts->ulen, J_DISKTFIXSIZE);
328 if (rv != ts->ulen)
329 goto exit;
332 ts->pdata = malloc(ts->len);
333 if (ts->pdata == NULL)
334 goto exit;
336 ts->plen = ts->len;
338 /* copy the current content into the transaction file */
339 rv = spread(ts->fs->fd, ts->pdata, ts->len, ts->offset);
340 if (rv < 0)
341 goto exit;
342 if (rv < ts->len) {
343 /* we are extending the file! use ftruncate() to do it */
344 ftruncate(ts->fs->fd, ts->offset + ts->len);
346 ts->plen = rv;
350 t = J_DISKTFIXSIZE + ts->ulen;
351 rv = spwrite(fd, ts->pdata, ts->len, t);
352 if (rv != ts->len)
353 goto exit;
355 /* save the new data in the transaction file */
356 t = J_DISKTFIXSIZE + ts->ulen + ts->plen;
357 rv = spwrite(fd, ts->buf, ts->len, t);
358 if (rv != ts->len)
359 goto exit;
361 /* this is a simple but efficient optimization: instead of doing
362 * everything O_SYNC, we sync at this point only, this way we avoid
363 * doing a lot of very small writes; in case of a crash the
364 * transaction file is only useful if it's complete (ie. after this
365 * point) so we only flush here */
366 fsync(fd);
368 /* now that we have a safe transaction file, let's apply it */
369 rv = spwrite(ts->fs->fd, ts->buf, ts->len, ts->offset);
370 if (rv != ts->len)
371 goto exit;
373 /* mark the transaction as commited */
374 ts->flags = ts->flags | J_COMMITED;
376 /* the transaction has been applied, so we cleanup and remove it from
377 * the disk */
378 free_tid(ts->fs, ts->id);
379 unlink(name);
381 exit:
382 close(fd);
384 if (!(ts->fs->flags & J_NOLOCK))
385 plockf(ts->fs->fd, F_ULOCK, ts->offset, ts->len);
387 /* return the lenght only if it was properly commited */
388 if (ts->flags & J_COMMITED)
389 return ts->len;
390 else
391 return -1;
395 /* rollback a transaction */
396 int jtrans_rollback(struct jtrans *ts)
398 int rv;
399 struct jtrans newts;
401 /* copy the old transaction to the new one */
402 jtrans_init(ts->fs, &newts);
404 newts.name = malloc(strlen(ts->name));
405 if (newts.name == NULL)
406 return -1;
408 strcpy(newts.name, ts->name);
409 newts.flags = ts->flags;
410 newts.offset = ts->offset;
412 newts.buf = ts->pdata;
413 newts.len = ts->plen;
415 if (ts->plen < ts->len) {
416 /* we extended the data in the previous transaction, so we
417 * should truncate it back */
418 /* DANGEROUS: this is one of the main reasons why rollbacking
419 * is dangerous and should only be done with extreme caution:
420 * if for some reason, after the previous transacton, we have
421 * extended the file further, this will cut it back to what it
422 * was; read the docs for more detail */
423 ftruncate(ts->fs->fd, ts->offset + ts->plen);
427 newts.pdata = ts->buf;
428 newts.plen = ts->len;
430 newts.udata = ts->udata;
431 newts.ulen = ts->ulen;
433 rv = jtrans_commit(&newts);
434 return rv;
439 * basic operations
442 /* open a file */
443 int jopen(struct jfs *fs, char *name, int flags, int mode, int jflags)
445 int fd, jfd, rv;
446 unsigned int t;
447 char jdir[PATH_MAX], jlockfile[PATH_MAX];
448 struct stat sinfo;
450 fd = open(name, flags, mode);
451 if (fd < 0)
452 return -1;
454 fs->fd = fd;
455 fs->name = name;
456 fs->flags = jflags;
458 pthread_mutex_init( &(fs->lock), NULL);
460 if (!get_jdir(name, jdir))
461 return -1;
462 rv = mkdir(jdir, 0750);
463 rv = lstat(jdir, &sinfo);
464 if (rv < 0 || !S_ISDIR(sinfo.st_mode))
465 return -1;
467 snprintf(jlockfile, PATH_MAX, "%s/%s", jdir, "lock");
468 if (access(jlockfile, F_OK) != 0) {
469 /* file doesn't exists, create it */
470 jfd = open(jlockfile, O_RDWR | O_CREAT | O_SYNC, 0600);
471 } else {
472 jfd = open(jlockfile, O_RDWR | O_SYNC, 0600);
474 if (jfd < 0)
475 return -1;
477 /* initialize the lock file by writing the first tid to it, but only
478 * if its empty, otherwise there is a race if two processes call
479 * jopen() simultaneously and both initialize the file */
480 plockf(jfd, F_LOCK, 0, 0);
481 lstat(jlockfile, &sinfo);
482 if (sinfo.st_size == 0) {
483 t = 1;
484 rv = write(jfd, &t, sizeof(t));
485 if (rv != sizeof(t)) {
486 plockf(jfd, F_ULOCK, 0, 0);
487 return -1;
490 plockf(jfd, F_ULOCK, 0, 0);
492 fs->jfd = jfd;
494 return fd;
497 /* read wrapper */
498 ssize_t jread(struct jfs *fs, void *buf, size_t count)
500 int rv;
501 pthread_mutex_lock(&(fs->lock));
502 lockf(fs->fd, F_LOCK, count);
503 rv = read(fs->fd, buf, count);
504 lockf(fs->fd, F_ULOCK, -count);
505 pthread_mutex_unlock(&(fs->lock));
507 return rv;
510 /* pread wrapper */
511 ssize_t jpread(struct jfs *fs, void *buf, size_t count, off_t offset)
513 int rv;
514 plockf(fs->fd, F_LOCK, offset, count);
515 rv = pread(fs->fd, buf, count, offset);
516 plockf(fs->fd, F_ULOCK, offset, count);
518 return rv;
521 /* readv wrapper */
522 ssize_t jreadv(struct jfs *fs, struct iovec *vector, int count)
524 int rv, i;
525 size_t sum;
527 sum = 0;
528 for (i = 0; i < count; i++)
529 sum += vector[i].iov_len;
531 pthread_mutex_lock(&(fs->lock));
532 lockf(fs->fd, F_LOCK, sum);
533 rv = readv(fs->fd, vector, count);
534 lockf(fs->fd, F_ULOCK, -sum);
535 pthread_mutex_unlock(&(fs->lock));
537 return rv;
540 /* write wrapper */
541 ssize_t jwrite(struct jfs *fs, void *buf, size_t count)
543 int rv;
544 off_t pos;
545 struct jtrans ts;
547 pthread_mutex_lock(&(fs->lock));
549 jtrans_init(fs, &ts);
550 pos = lseek(fs->fd, 0, SEEK_CUR);
551 ts.offset = pos;
553 ts.buf = buf;
554 ts.len = count;
556 rv = jtrans_commit(&ts);
558 pthread_mutex_unlock(&(fs->lock));
559 return rv;
562 /* pwrite wrapper */
563 ssize_t jpwrite(struct jfs *fs, void *buf, size_t count, off_t offset)
565 int rv;
566 struct jtrans ts;
568 pthread_mutex_lock(&(fs->lock));
570 jtrans_init(fs, &ts);
571 ts.offset = offset;
573 ts.buf = buf;
574 ts.len = count;
576 rv = jtrans_commit(&ts);
578 pthread_mutex_unlock(&(fs->lock));
579 return rv;
582 /* writev wrapper */
583 ssize_t jwritev(struct jfs *fs, struct iovec *vector, int count)
585 int rv, i, bufp;
586 ssize_t sum;
587 char *buf;
588 off_t pos;
589 struct jtrans ts;
591 sum = 0;
592 for (i = 0; i < count; i++)
593 sum += vector[i].iov_len;
595 /* unify the buffers into one big chunk to commit */
596 /* FIXME: can't we do this more efficient? It ruins the whole purpose
597 * of using writev() :\
598 * maybe we should do one transaction per vector */
599 buf = malloc(sum);
600 if (buf == NULL)
601 return -1;
602 bufp = 0;
604 for (i = 0; i < count; i++) {
605 memcpy(buf + bufp, vector[i].iov_base, vector[i].iov_len);
606 bufp += vector[i].iov_len;
609 pthread_mutex_lock(&(fs->lock));
611 jtrans_init(fs, &ts);
612 pos = lseek(fs->fd, 0, SEEK_CUR);
613 ts.offset = pos;
615 ts.buf = buf;
616 ts.len = sum;
618 rv = jtrans_commit(&ts);
620 pthread_mutex_unlock(&(fs->lock));
621 return rv;
625 /* truncate a file - be careful with this */
626 int jtruncate(struct jfs *fs, off_t lenght)
628 int rv;
630 /* lock from lenght to the end of file */
631 plockf(fs->fd, F_LOCK, lenght, 0);
632 rv = ftruncate(fs->fd, lenght);
633 plockf(fs->fd, F_ULOCK, lenght, 0);
635 return rv;
638 /* close a file */
639 int jclose(struct jfs *fs)
641 if (close(fs->fd))
642 return -1;
643 if (close(fs->jfd))
644 return -1;
645 return 0;
650 * journal recovery
653 /* check the journal and replay the incomplete transactions */
654 int jfsck(char *name, struct jfsck_result *res)
656 int fd, jfd, tfd, rv, i, maxtid;
657 char jdir[PATH_MAX], jlockfile[PATH_MAX], tname[PATH_MAX];
658 char *buf = NULL;
659 struct stat sinfo;
660 struct jfs fs;
661 struct jtrans *curts;
662 DIR *dir;
663 off_t offset;
665 fd = open(name, O_RDWR | O_SYNC | O_LARGEFILE);
666 if (fd < 0)
667 return J_ENOENT;
669 fs.fd = fd;
670 fs.name = name;
672 if (!get_jdir(name, jdir))
673 return J_ENOMEM;
674 rv = lstat(jdir, &sinfo);
675 if (rv < 0 || !S_ISDIR(sinfo.st_mode))
676 return J_ENOJOURNAL;
678 snprintf(jlockfile, PATH_MAX, "%s/%s", jdir, "lock");
679 jfd = open(jlockfile, O_RDWR | O_SYNC, 0600);
680 if (jfd < 0)
681 return J_ENOJOURNAL;
683 lstat(jlockfile, &sinfo);
684 if (sinfo.st_size == 0)
685 return J_ENOJOURNAL;
687 plockf(jfd, F_LOCK, 0, 0);
688 rv = spread(jfd, &maxtid, sizeof(maxtid), 0);
689 if (rv != sizeof(maxtid)) {
690 return J_ENOJOURNAL;
692 plockf(jfd, F_ULOCK, 0, 0);
694 fs.jfd = jfd;
696 dir = opendir(jdir);
697 if (dir == NULL)
698 return J_ENOJOURNAL;
700 /* we loop all the way up to the max transaction id */
701 for (i = 1; i <= maxtid; i++) {
702 curts = malloc(sizeof(struct jtrans));
703 if (curts == NULL)
704 return J_ENOMEM;
706 jtrans_init(&fs, curts);
707 curts->id = i;
709 /* open the transaction file, using i as its name, so we are
710 * really looping in order (recovering transaction in a
711 * different order as they were applied means instant
712 * corruption) */
713 if (!get_jtfile(name, i, tname))
714 return J_ENOMEM;
715 tfd = open(tname, O_RDWR | O_SYNC | O_LARGEFILE, 0600);
716 if (tfd < 0) {
717 res->invalid++;
718 goto loop;
721 /* try to lock the transaction file, if it's locked then it is
722 * currently being used so we skip it */
723 rv = plockf(fd, F_TLOCK, 0, 0);
724 if (rv == -1) {
725 res->in_progress++;
726 goto loop;
729 curts->name = tname;
731 /* load from disk, header first */
732 buf = (char *) malloc(J_DISKTFIXSIZE);
733 if (buf == NULL) {
734 res->load_error++;
735 goto loop;
738 rv = read(tfd, buf, J_DISKTFIXSIZE);
739 if (rv != J_DISKTFIXSIZE) {
740 res->broken_head++;
741 goto loop;
744 curts->flags = (int) *(buf + 4);
745 curts->len = (size_t) *(buf + 8);
746 curts->ulen = (size_t) *(buf + 16);
747 curts->offset = (off_t) *(buf + 20);
749 /* if we got here, the transaction was not applied, so we
750 * check if the transaction file is complete (we only need to
751 * apply it) or not (so we can't do anything but ignore it) */
753 lstat(tname, &sinfo);
754 rv = J_DISKTFIXSIZE + curts->len + curts->ulen + curts->plen;
755 if (sinfo.st_size != rv) {
756 /* the transaction file is incomplete, some of the
757 * body is missing */
758 res->broken_body++;
759 goto loop;
762 /* we have a complete transaction file which commit was not
763 * successful, so we read it to complete the transaction
764 * structure and apply it again */
765 curts->buf = malloc(curts->len);
766 if (curts->buf == NULL) {
767 res->load_error++;
768 goto loop;
771 curts->pdata = malloc(curts->plen);
772 if (curts->pdata == NULL) {
773 res->load_error++;
774 goto loop;
777 curts->udata = malloc(curts->ulen);
778 if (curts->udata == NULL) {
779 res->load_error++;
780 goto loop;
783 /* user data */
784 offset = J_DISKTFIXSIZE;
785 rv = spread(tfd, curts->udata, curts->ulen, offset);
786 if (rv != curts->ulen) {
787 printf("ULEN\n");
788 res->load_error++;
789 goto loop;
792 /* previous data */
793 offset = J_DISKTFIXSIZE + curts->ulen;
794 rv = spread(tfd, curts->pdata, curts->plen, offset);
795 if (rv != curts->plen) {
796 printf("PLEN\n");
797 res->load_error++;
798 goto loop;
801 /* real data */
802 offset = J_DISKTFIXSIZE + curts->ulen + curts->plen;
803 rv = spread(tfd, curts->buf, curts->len, offset);
804 if (rv != curts->len) {
805 res->load_error++;
806 goto loop;
809 /* apply */
810 rv = jtrans_commit(curts);
811 if (rv < 0) {
812 res->apply_error++;
813 goto loop;
815 res->reapplied++;
817 /* free the data we just allocated */
818 if (curts->len)
819 free(curts->buf);
820 if (curts->plen)
821 free(curts->pdata);
822 if (curts->ulen)
823 free(curts->udata);
825 loop:
826 if (tfd > 0)
827 close(tfd);
829 res->total++;
830 if (buf)
831 free(buf);
832 free(curts);
835 return 0;