Fix block insertion in replace_chunk
[hed.git] / libhed / file.c
blob029b0cb1537745557ba141320001ff29dd24f6fa
1 /* $Id$ */
3 /*
4 * hed - Hexadecimal editor
5 * Copyright (C) 2004 Petr Baudis <pasky@ucw.cz>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of version 2 of the GNU General Public License as
9 * published by the Free Software Foundation.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 * There hammer on the anvil smote,
23 * There chisel clove, and graver wrote;
24 * There forged was blade, and bound was hilt;
25 * The delver mined, the mason built.
26 * There beryl, pearl, and opal pale,
27 * And metal wrought like fishes' mail,
28 * Buckler and corslet, axe and sword,
29 * And shining spears were laid in hoard.
32 /* Feature macros needed for:
33 * - memrchr
34 * - pread, pwrite
35 * - stpcpy
37 #define _GNU_SOURCE
39 #include <config.h>
41 #include <errno.h>
42 #include <fcntl.h>
43 #include <limits.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <sys/ioctl.h>
48 #include <unistd.h>
49 #include <linux/fs.h> /* BLKGETSIZE and BLKGETSIZE64 */
51 #include "file.h"
52 #include "access.h"
53 #include "cache.h"
54 #include "swap.h"
55 #include "tree.h"
56 #include "expr.h"
58 /* memrchr() might not be available */
59 #ifndef HAVE_MEMRCHR
60 # include "memrchr.c"
61 #endif /* HAVE_MEMRCHR */
64 * `Piles of jewels?' said Gandalf. `No. The Orcs have often plundered Moria;
65 * there is nothing left in the upper halls. And since the dwarves fled, no one
66 * dares to seek the shafts and treasuries down in the deep places: they are
67 * drowned in water--or in a shadow of fear.'
70 /* TODO: Currently the file blocks allocation is not very sophisticated and
71 * when the weather is bad it could probably have rather horrible results. */
73 #undef BLOCKS_DEBUG
74 #ifdef BLOCKS_DEBUG
75 #define BDEBUG(x...) fprintf(stderr, x)
76 #else
77 #define BDEBUG(x...)
78 #endif
80 /* Number of blocks in cache */
81 #define CACHE_LENGTH 64
83 /* Blocks for readahead */
84 #define FILE_READAHEAD (CACHE_LENGTH/2)
86 #define file_block hed_block
87 #define blockoff_t hed_cursor_t
89 #define first_block(f) next_block(last_block(f))
90 #define prev_block(b) (tree_entry(prev_in_tree(&(b)->t),struct file_block,t))
91 #define next_block(b) (tree_entry(next_in_tree(&(b)->t),struct file_block,t))
92 #define last_block(f) (&(f)->eof_block)
94 #define block_offset(b) tree_block_offset(&(b)->t)
96 #define recalc_block_recursive(b) recalc_node_recursive(&(b)->t)
98 #define chain_block_before(tree,b,p) insert_into_tree((tree), &(b)->t, &(p)->t)
99 #define recalc_chain_block_before(tree,b,p) do { \
100 chain_block_before((tree), (b), (p)); \
101 recalc_block_recursive((b)); \
102 } while (0)
104 #define unchain_block(tree,b) del_from_tree((tree), &(b)->t)
105 #define recalc_unchain_block(tree,b) recalc_del_from_tree((tree), &(b)->t)
107 #define init_block_list(tree,b) init_tree(tree, &(b)->t)
108 #define init_block_link(b) init_node(&(b)->t)
110 #define find_block(tree,o) tree_entry(find_in_tree((tree),(o)),struct file_block,t)
112 #define file_size hed_file_size
113 #define file_blocks hed_file_blocks
115 #ifdef HED_CONFIG_SWAP
117 /* Return the swp file object */
118 static inline struct swp_file *
119 file_swp(struct hed_file *file)
121 return file->swp;
124 #else
126 /* Provide a stub for the non-swap case */
127 static inline void *
128 file_swp(struct hed_file *file)
130 return NULL;
133 #endif
135 #ifdef HED_CONFIG_READAHEAD
137 #define file_ra_none(f) ((f)->readahead == HED_RA_NONE)
138 #define file_ra_forward(f) ((f)->readahead == HED_RA_FORWARD)
139 #define file_ra_backward(f) ((f)->readahead == HED_RA_BACKWARD)
141 #else
143 #define file_ra_none(f) (1)
144 #define file_ra_forward(f) (0)
145 #define file_ra_backward(f) (0)
147 #endif /* HED_CONFIG_READAHEAD */
149 /* Get the physical offset of the byte immediately following @block. */
150 static inline hed_uoff_t
151 phys_end(const struct hed_block *block)
153 return hed_block_is_inserted(block)
154 ? block->phys_pos
155 : block->phys_pos + hed_block_size(block);
158 static struct hed_block *
159 next_nonzero_block(struct hed_block *block)
161 while (!hed_block_is_eof(block)) {
162 block = next_block(block);
163 if (hed_block_size(block))
164 return block;
166 return NULL;
169 static struct hed_block *
170 prev_nonzero_block(struct hed_block *block)
172 do {
173 block = prev_block(block);
174 if (hed_block_is_eof(block))
175 return NULL;
176 } while (!hed_block_size(block));
177 return block;
180 bool
181 hed_block_is_after_erase(struct hed_block *block)
183 struct hed_block *prev = prev_nonzero_block(block);
184 return prev
185 ? block->phys_pos > phys_end(prev)
186 : !!block->phys_pos;
189 bool
190 hed_block_is_after_insert(struct hed_block *block)
192 struct hed_block *prev = prev_nonzero_block(block);
193 return prev && hed_block_is_inserted(prev);
196 #ifndef BLOCKS_DEBUG
197 # define dump_blocks(file) {}
198 #else
200 static hed_uoff_t
201 block_phys_size(struct hed_file *file, struct file_block *block)
203 struct file_block *next;
205 if (hed_block_is_eof(block))
206 return 0;
207 next = next_block(block);
208 return next->phys_pos - block->phys_pos;
211 static void
212 dump_block(int level, struct hed_file *file, struct hed_tree_node *node,
213 hed_uoff_t *cur_offset, hed_uoff_t *cur_poffset)
215 struct hed_block *block = tree_entry(node, struct hed_block, t);
216 bool virtual = hed_block_is_virtual(block);
217 unsigned char *p;
218 hed_cursor_t *cur;
219 char t[21] = " ";
221 if (node->left)
222 dump_block(level + 1, file, node->left, cur_offset, cur_poffset);
223 p = hed_block_data(block);
224 if (level < 20) t[level] = '>'; else t[19] = '.';
225 fprintf(stderr, "%s [%06llx] [%06llx] %c%c%c %05llx %05llx"
226 " {%02x%02x%02x%02x} -- %p ^%p [%06llx]\n",
228 (unsigned long long) *cur_offset,
229 (unsigned long long) *cur_poffset,
230 virtual ? 'v' : ' ',
231 hed_block_is_inserted(block) ? 'i' : ' ',
232 hed_block_is_dirty(block) ? '*' : ' ',
233 (unsigned long long) node->size,
234 (unsigned long long) block_phys_size(file, block),
235 p && block->t.size > 0 ? p[0] : 0,
236 p && block->t.size > 1 ? p[1] : 0,
237 p && block->t.size > 2 ? p[2] : 0,
238 p && block->t.size > 3 ? p[3] : 0,
239 node, node->up,
240 (unsigned long long) node->cover_size
242 list_for_each_entry (cur, &block->refs, list) {
243 fprintf(stderr, " <%p>: %llx->%p:%llx\n",
244 cur, (long long)cur->pos,
245 cur->block, (unsigned long long)cur->off);
247 assert(*cur_poffset == block->phys_pos);
248 *cur_offset += node->size;
249 *cur_poffset += block_phys_size(file, block);
250 if (node->right)
251 dump_block(level + 1, file, node->right, cur_offset, cur_poffset);
252 assert(node->cover_size == (node->left ? node->left->cover_size : 0)
253 + (node->right ? node->right->cover_size : 0)
254 + node->size);
257 /* Walk the tree manually here, because foreach_block() does not provide
258 * the tree structure.
259 * TODO: Change this if you plan to debug any other block containers.
261 static void
262 dump_blocks(struct hed_file *file)
264 struct file_block *first = first_block(file);
265 hed_uoff_t cur_offset, cur_poffset;
267 fprintf(stderr, "-- blocks dump --\n");
268 cur_offset = 0;
269 cur_poffset = first->phys_pos;
270 dump_block(0, file, file_blocks(file)->root,
271 &cur_offset, &cur_poffset);
272 fprintf(stderr, "-- blocks dump end --\n");
274 #endif
276 static void
277 get_cursor(struct hed_file *file, hed_uoff_t offset, hed_cursor_t *curs)
279 struct file_block *block;
281 block = find_block(file_blocks(file), offset);
282 assert(block != NULL);
283 curs->pos = offset;
284 curs->block = block;
285 curs->off = offset - block_offset(block);
286 list_add(&curs->list, &block->refs);
288 BDEBUG("Mapped %llx to %llx+%llx/%llx\n",
289 offset, offset - curs->off, curs->off, block->t.size);
292 void
293 hed_get_cursor(struct hed_file *file, hed_uoff_t offset, hed_cursor_t *curs)
295 get_cursor(file, offset, curs);
298 static inline void
299 put_cursor(hed_cursor_t *curs)
301 list_del(&curs->list);
304 void
305 hed_put_cursor(hed_cursor_t *curs)
307 put_cursor(curs);
310 void
311 hed_update_cursor(struct hed_file *file, hed_uoff_t offset, hed_cursor_t *curs)
313 put_cursor(curs);
314 get_cursor(file, offset, curs);
317 void
318 hed_dup_cursor(const hed_cursor_t *src, hed_cursor_t *dst)
320 dst->pos = src->pos;
321 dst->block = src->block;
322 dst->off = src->off;
323 list_add_tail(&dst->list, &src->block->refs);
326 void
327 hed_dup2_cursor(const hed_cursor_t *src, hed_cursor_t *dst)
329 if (hed_is_a_cursor(dst))
330 put_cursor(dst);
331 hed_dup_cursor(src, dst);
334 /* Move cursors from @old to @new, adding @off to their block
335 * offsets to keep them at the same position. */
336 static void
337 update_cursors(const struct hed_block *old, struct hed_block *new,
338 hed_off_t off)
340 hed_cursor_t *curs;
342 BDEBUG("Updating cursors from <%p> to <%p>%c%llx\n",
343 old, new, off >= 0 ? '+' : '-', off >= 0 ? off : -off);
345 list_for_each_entry(curs, &old->refs, list) {
346 curs->block = new;
347 curs->off += off;
351 /* Move cursors in the range <@start;@end> from @old to @new,
352 * adding @off to their block offset, plus moving the reference list. */
353 static void
354 move_cursors(const struct hed_block *old, struct hed_block *new,
355 hed_uoff_t start, hed_uoff_t end, hed_off_t off)
357 hed_cursor_t *curs, *nextcurs;
359 BDEBUG("Moving cursors from <%p>:%llx:%llx to <%p>%c%llx\n",
360 old, start, end, new,
361 off >= 0 ? '+' : '-', off >= 0 ? off : -off);
363 list_for_each_entry_safe(curs, nextcurs, &old->refs, list)
364 if (curs->off >= start && curs->off <= end) {
365 curs->block = new;
366 curs->off += off;
367 list_move(&curs->list, &new->refs);
371 /* Move cursors in the range @block:<@start;@end> to @newpos */
372 static void
373 move_cursors_abs(const struct hed_block *block,
374 hed_uoff_t start, hed_uoff_t end,
375 const hed_cursor_t *newpos)
377 hed_cursor_t *curs, *nextcurs;
379 BDEBUG("Moving cursors from <%p>:%llx:%llx to <%p>:%llx\n",
380 block, start, end, newpos->block, newpos->off);
382 list_for_each_entry_safe(curs, nextcurs, &block->refs, list)
383 if (curs->off >= start && curs->off <= end) {
384 curs->pos = newpos->pos;
385 curs->block = newpos->block;
386 curs->off = newpos->off;
387 list_move(&curs->list, &newpos->block->refs);
391 /* Update the positions of cursors at and after @start for all
392 * blocks starting at @block */
393 static void
394 slide_cursors(struct hed_file *file, const struct hed_block *block,
395 hed_uoff_t start, hed_off_t off)
397 hed_cursor_t *curs;
398 const struct hed_block *nblock;
400 BDEBUG("Sliding cursors >= %llx by %c%llx, starting at <%p>\n",
401 start, off >= 0 ? '+' : '-', off >= 0 ? off : -off, block);
402 nblock = block;
403 do {
404 block = nblock;
405 list_for_each_entry(curs, &block->refs, list)
406 if (curs->pos >= start)
407 curs->pos += off;
408 nblock = next_block(block);
409 } while (!hed_block_is_eof(block));
412 static struct hed_block *
413 new_block(struct hed_file *file, long flags)
415 struct file_block *new;
417 if (! (new = swp_zalloc(file_swp(file), sizeof(struct file_block))) )
418 return NULL;
420 new->flags = flags;
421 init_block_link(new);
422 INIT_LIST_HEAD(&new->refs);
423 if (flags & HED_BLOCK_EXCACHE)
424 INIT_LIST_HEAD(&new->lru);
425 else
426 list_add_tail(&new->lru, &file->lru);
428 return new;
431 static struct hed_block *
432 new_virt_block(struct hed_file *file, hed_uoff_t pos, hed_uoff_t size,
433 long extraflags)
435 struct hed_block *new =
436 new_block(file, (HED_BLOCK_EXCACHE |
437 HED_BLOCK_VIRTUAL |
438 extraflags));
439 if (!new)
440 return NULL;
442 new->t.size = size;
443 new->phys_pos = pos;
444 BDEBUG("Spawned new virtual block [%llx] at %llx\n", size, pos);
445 return new;
448 static struct hed_block *
449 new_data_block(struct hed_file *file, hed_uoff_t pos, hed_uoff_t size,
450 struct hed_block_data *dataobj)
452 struct hed_block *new =
453 new_block(file, 0);
454 if (!new)
455 return NULL;
457 cache_get(dataobj);
458 new->dataobj = dataobj;
459 new->t.size = size;
460 new->phys_pos = pos;
461 new->dataoff = FILE_BLOCK_OFF(pos);
462 BDEBUG("Spawned new data block [%llx] at %llx\n", size, pos);
463 return new;
466 static void
467 file_free_block(struct hed_file *file, struct file_block *block)
469 if (block->dataobj)
470 cache_put(file->cache, block->dataobj);
471 list_del(&block->lru);
473 swp_free(file_swp(file), block);
476 static bool
477 kill_block_if_empty(struct hed_file *file, struct file_block *block)
479 if (!hed_block_is_eof(block) && block->t.size == 0 &&
480 list_empty(&block->refs)) {
481 /* No recalculation needed, zero size. */
482 unchain_block(file_blocks(file), block);
483 file_free_block(file, block);
484 return true;
486 return false;
489 /* This may kill the previous block as well, if it can be merged
490 * with the next one. It will never kill anything which _follows_. */
491 static void
492 file_kill_block(struct hed_file *file, struct file_block *block)
494 hed_uoff_t phys_pos = block->phys_pos;
495 struct file_block *prev = prev_block(block);
496 struct file_block *next = next_block(block);
497 struct file_block *merger;
498 bool killprev = false;
500 /* We should never kill a dirty block! */
501 assert(!hed_block_is_dirty(block));
502 /* We shouldn't get with an empty block here (that might
503 * need special considerations with virtualization). */
504 assert(block->t.size > 0);
506 if (!hed_block_is_eof(block) &&
507 hed_block_is_inner_virtual(next) &&
508 phys_pos + block->t.size == next->phys_pos) {
509 if (!hed_block_is_eof(prev) &&
510 hed_block_is_inner_virtual(prev) &&
511 prev->phys_pos + prev->t.size == phys_pos)
512 killprev = true;
513 merger = next;
514 merger->phys_pos -= block->t.size;
515 update_cursors(merger, merger, block->t.size);
516 update_cursors(block, merger, 0);
517 } else if (!hed_block_is_eof(prev) &&
518 hed_block_is_inner_virtual(prev) &&
519 prev->phys_pos + prev->t.size == phys_pos) {
520 merger = prev;
521 update_cursors(block, merger, merger->t.size);
522 } else if (!hed_block_is_virtual(block)) {
523 /* Convert physical to virtual */
524 assert(block->dataobj);
525 cache_put(file->cache, block->dataobj);
526 block->dataobj = NULL;
528 list_del_init(&block->lru); /* unlink the block from LRU */
529 hed_block_set_excache(block); /* say it's unlinked */
530 hed_block_set_virtual(block);
531 return;
532 } else
533 /* Already virtual and cannot merge */
534 return;
536 list_splice(&block->refs, &merger->refs);
538 /* Messing with block sizes and unchaining is a bit tricky
539 * since unchain_block() can splay(). So we really need
540 * to recalc_block_recursive() right after we update the size.
541 * If this place turns out to be a hot-spot, we can optimize
542 * the tree operations here. */
543 merger->t.size += block->t.size;
544 recalc_block_recursive(merger);
546 /* Destroy the block */
547 recalc_unchain_block(file_blocks(file), block);
548 file_free_block(file, block);
550 if (killprev)
551 file_kill_block(file, prev);
554 static struct file_block *
555 split_block(struct hed_file *file, struct hed_block *block,
556 hed_uoff_t splitpoint)
558 struct file_block *next;
560 next = new_block(file, block->flags);
561 if (!next)
562 return NULL;
564 if ( (next->dataobj = block->dataobj) ) {
565 cache_get(next->dataobj);
566 next->dataoff = block->dataoff + splitpoint;
567 } else
568 assert(hed_block_is_virtual(block));
570 next->t.size = block->t.size - splitpoint;
571 next->phys_pos = block->phys_pos;
572 if (!hed_block_is_inserted(block))
573 next->phys_pos += splitpoint;
575 block->t.size = splitpoint;
576 recalc_block_recursive(block);
577 recalc_chain_block_before(file_blocks(file), next, next_block(block));
579 move_cursors(block, next, splitpoint, UOFF_MAX, -splitpoint);
581 return next;
584 /* Replace a chunk in @block with @newblock */
585 static int
586 replace_chunk(struct hed_file *file, struct hed_block *block,
587 hed_uoff_t offset, struct hed_block *newblock)
589 size_t len = newblock->t.size;
590 hed_uoff_t leadlen = offset + len;
592 assert(offset < block->t.size);
593 assert(len <= block->t.size - offset);
595 /* Re-create the head block if necessary */
596 if (offset) {
597 struct file_block *head;
599 head = new_block(file, block->flags & ~HED_BLOCK_EOF);
600 if (!head)
601 return -1;
602 head->t.size = offset;
603 head->dataobj = block->dataobj;
604 head->dataoff = block->dataoff;
605 head->phys_pos = block->phys_pos;
607 recalc_chain_block_before(hed_file_blocks(file),
608 head, block);
610 /* Move cursors to the head */
611 move_cursors(block, head, 0, offset - 1, 0);
614 /* Move pointers to the new block */
615 assert(leadlen > 0);
616 move_cursors(block, newblock, offset, leadlen - 1, -offset);
618 /* Shorten the tail block */
619 block->t.size -= leadlen;
620 block->dataoff += leadlen;
621 assert(!hed_block_is_inserted(block));
622 block->phys_pos += leadlen;
623 recalc_block_recursive(block);
624 move_cursors(block, block, leadlen, UOFF_MAX, -leadlen);
626 /* Insert the new block */
627 recalc_chain_block_before(file_blocks(file), newblock, block);
629 /* Kill the leading block if possible */
630 kill_block_if_empty(file, block);
632 return 0;
635 #ifdef HED_CONFIG_SWAP
637 static char *
638 swp_filename(const char *filename)
640 size_t fnlen = strlen(filename);
641 char *swp;
642 char *file;
644 if (!(swp = malloc(fnlen + 9)) )
645 return NULL;
646 strcpy(swp, filename);
648 file = strrchr(swp, '/');
649 file = file ? file + 1 : swp;
650 *file = '.';
651 strcpy(stpcpy(file + 1, filename + (file -swp)), ".hedswp");
652 return swp;
655 static char *
656 newswp_filename(char *swpname)
658 char *ret, *p;
660 ret = swpname;
661 while (!access(ret, F_OK)) {
662 if (ret == swpname) {
663 if (! (ret = strdup(swpname)) )
664 return NULL;
665 p = ret + strlen(ret) - 1;
668 if (*p == 'a') {
669 free(ret);
670 return NULL;
672 --*p;
674 return ret;
678 hed_file_remove_swap(struct hed_file *file)
680 if (remove(file->swpname))
681 return -1;
682 if (rename(file->newswpname, file->swpname))
683 return -1;
685 free(file->newswpname);
686 file->newswpname = file->swpname;
687 return 0;
690 static inline struct hed_file *
691 file_swp_init(const char *name)
693 char *swpname, *newswpname;
694 struct swp_file *swp;
695 struct hed_file *file;
697 swpname = swp_filename(name);
698 if (!swpname)
699 goto fail;
700 newswpname = newswp_filename(swpname);
701 if (!newswpname)
702 goto fail_free_name;
703 swp = swp_init_write(newswpname);
704 if (!swp)
705 goto fail_free_newname;
707 assert(sizeof(struct swp_header) + sizeof(struct hed_file)
708 <= FILE_BLOCK_SIZE);
709 file = swp_private(swp);
710 memset(file, 0, sizeof *file);
712 file->swp = swp;
713 file->swpname = swpname;
714 file->newswpname = newswpname;
716 return file;
718 fail_free_newname:
719 free(newswpname);
720 fail_free_name:
721 if (swpname != newswpname)
722 free(swpname);
723 fail:
724 return NULL;
727 static inline void
728 file_swp_done(struct hed_file *file)
730 remove(file->newswpname);
731 if (file->newswpname != file->swpname)
732 free(file->newswpname);
733 free(file->swpname);
734 swp_done(file_swp(file));
735 /* file was de-allocated automatically with file->swp */
738 #else /* HED_CONFIG_SWAP */
740 static inline struct hed_file *
741 file_swp_init(const char *name)
743 return calloc(1, sizeof(struct hed_file));
746 static inline void
747 file_swp_done(struct hed_file *file)
751 #endif /* HED_CONFIG_SWAP */
753 static inline struct stat *
754 file_stat(struct hed_file *file)
756 return &file->s;
760 hed_file_update_size(struct hed_file *file)
762 hed_uoff_t oldsize = file->phys_size;
764 if(fstat(file->fd, file_stat(file)) < 0)
765 return -1;
767 if (S_ISBLK(file_stat(file)->st_mode)) {
768 if (ioctl(file->fd, BLKGETSIZE64, &file->phys_size)) {
769 unsigned long size_in_blocks;
770 if (ioctl(file->fd, BLKGETSIZE, &size_in_blocks))
771 return -1;
772 file->phys_size = (hed_uoff_t)size_in_blocks << 9;
774 } else if (S_ISREG(file_stat(file)->st_mode)) {
775 file->phys_size = file_stat(file)->st_size;
776 } else if (S_ISCHR(file_stat(file)->st_mode)) {
777 if (lseek(file->fd, 0, SEEK_SET) < 0)
778 return -1;
779 file->phys_size = (hed_uoff_t)OFF_MAX + 1;
780 } else {
781 errno = EINVAL;
782 return -1;
785 file->size += file->phys_size - oldsize;
786 return 0;
789 static int
790 do_file_open(struct hed_file *file)
792 file->fd = open(file->name, O_RDONLY);
793 if (file->fd < 0) {
794 if (errno != ENOENT)
795 return errno;
796 fprintf(stderr, "Warning: File %s does not exist\n",
797 file->name);
798 memset(file_stat(file), 0, sizeof(struct stat));
800 } else if (hed_file_update_size(file)) {
801 int errsv = errno;
802 close(file->fd);
803 return errsv;
805 return 0;
808 static int
809 file_setup_blocks(struct hed_file *file)
811 hed_uoff_t phys_size = file->phys_size;
812 struct file_block *block;
814 block = &file->eof_block;
815 block->flags = HED_BLOCK_EXCACHE | HED_BLOCK_VIRTUAL | HED_BLOCK_EOF;
816 INIT_LIST_HEAD(&block->lru);
817 INIT_LIST_HEAD(&block->refs);
818 block->t.size = OFF_MAX - phys_size + 1;
819 block->phys_pos = phys_size;
821 init_block_list(file_blocks(file), block);
823 if (phys_size) {
824 block = new_virt_block(file, 0, phys_size, 0);
825 if (!block)
826 return -1;
827 recalc_chain_block_before(file_blocks(file), block,
828 &file->eof_block);
831 return 0;
835 libhed_init(void)
837 return file_access_init();
840 struct hed_file *
841 hed_open(const char *name)
843 struct hed_file *file;
845 if (! (file = file_swp_init(name)) )
846 goto fail;
848 file->name = name;
850 file->cache = cache_init(CACHE_LENGTH, file_swp(file));
851 if (!file->cache)
852 goto fail_file;
853 INIT_LIST_HEAD(&file->lru);
855 if (do_file_open(file))
856 goto fail_file;
858 if (file_setup_blocks(file))
859 goto fail_file;
861 fixup_register(file);
863 return file;
865 fail_file:
866 hed_close(file);
867 fail:
868 return NULL;
871 void
872 hed_close(struct hed_file *file)
874 assert(file);
876 /* Do not check for errors:
877 * 1. This FD is read-only => no data loss possbile
878 * 2. We're about to exit anyway => no resource leak
880 if (file->fd >= 0)
881 close(file->fd);
883 fixup_deregister(file);
885 /* No need to free file blocks here, because all data is
886 * allocated either from the cache or from the swap file
887 * and both is going to be destroyed now.
890 if (file->cache)
891 cache_done(file->cache);
893 file_swp_done(file);
896 /* Adjust a cursor after off gets outside its block */
897 static void
898 fixup_cursor_slow(hed_cursor_t *curs)
900 struct file_block *block = curs->block;
901 hed_uoff_t off = curs->off;
903 do {
904 if ((hed_off_t)off < 0) {
905 block = prev_block(block);
906 off += block->t.size;
907 } else {
908 off -= block->t.size;
909 block = next_block(block);
911 } while (off >= block->t.size);
913 curs->block = block;
914 curs->off = off;
915 list_move(&curs->list, &block->refs);
918 /* Adjust a cursor if off gets outside its block.
919 * This is separate from fixup_cursor_slow, because it is supposed
920 * to be small enough to be inlined (which is a win, because most of
921 * the time no fixup has to be done, so the fast inlined path is used).
923 static inline void
924 fixup_cursor(hed_cursor_t *curs)
926 if (curs->off >= curs->block->t.size)
927 fixup_cursor_slow(curs);
930 hed_off_t
931 hed_move_relative(hed_cursor_t *curs, hed_off_t num)
933 hed_off_t newpos = curs->pos + num;
934 if (newpos < 0) {
935 newpos = num < 0 ? 0 : OFF_MAX;
936 num = newpos - curs->pos;
938 curs->pos = newpos;
939 curs->off += num;
940 fixup_cursor(curs);
941 return num;
944 /* Relative move with no checking (and only by a small amount) */
945 static inline void
946 move_rel_fast(hed_cursor_t *curs, ssize_t num)
948 curs->off += num;
949 curs->pos += num;
950 fixup_cursor(curs);
953 static void
954 alloc_caches(struct hed_file *file, struct hed_block_data **caches, int n)
956 struct remap_control rc;
957 int i;
959 BDEBUG("Allocate %d caches (%d free slots available)\n",
960 n, file->cache->nfree);
962 assert(n <= CACHE_LENGTH);
963 while (file->cache->nfree < n) {
964 struct file_block *block;
966 assert(!list_empty(&file->lru));
967 block = list_entry(file->lru.next, struct file_block, lru);
968 BDEBUG("Killing block at physical %llx\n", block->phys_pos);
969 file_kill_block(file, block);
972 for (i = 0; i < n; ++i) {
973 caches[i] = cache_alloc(file->cache);
974 assert(caches[i]);
977 remap_init(&rc);
978 remap_compact(&rc, file->cache, caches, n);
979 for (i = 0; i < n; ++i)
980 remap_add(&rc, caches[i]->data);
981 remap_finish(&rc);
984 static inline void
985 free_caches(struct hed_file *file, struct hed_block_data **preload, int n)
987 int i;
989 for (i = 0; i < n; ++i)
990 if (preload[i])
991 cache_put(file->cache, preload[i]);
994 static int
995 file_load_data(struct hed_file *file,
996 struct hed_block_data **caches, int n,
997 hed_uoff_t offset)
999 struct hed_block_data *dataobj = caches[0];
1000 void *data = dataobj->data;
1001 ssize_t rsize, total, segsize;
1003 segsize = n << FILE_BLOCK_SHIFT;
1004 for (total = 0; total < segsize; total += rsize) {
1005 rsize = pread(file->fd, data + total,
1006 segsize - total, offset + total);
1007 if (!rsize)
1008 break;
1009 if (rsize < 0) {
1010 dataobj = caches[total >> FILE_BLOCK_SHIFT];
1011 caches[total >> FILE_BLOCK_SHIFT] = NULL;
1012 data = dataobj->data;
1013 cache_put(file->cache, dataobj);
1014 total = FILE_BLOCK_ROUND(total);
1015 rsize = FILE_BLOCK_SIZE;
1016 BDEBUG("Error reading block at phys %llx: %s\n",
1017 offset + total, strerror(errno));
1021 BDEBUG("Loaded data at phys %llx up to %llx\n",
1022 offset, offset + segsize);
1023 return 0;
1026 #ifdef HED_CONFIG_MMAP
1028 static int
1029 file_load_data_mmap(struct hed_file *file,
1030 struct hed_block_data **caches, int n,
1031 hed_uoff_t offset)
1033 void *data;
1034 ssize_t segsize;
1035 int i;
1037 segsize = n << FILE_BLOCK_SHIFT;
1038 data = mmap(NULL, segsize,
1039 PROT_READ | PROT_WRITE,
1040 MAP_PRIVATE | (file->fd < 0 ? MAP_ANONYMOUS : 0),
1041 file->fd, offset);
1043 if (data == MAP_FAILED) {
1044 BDEBUG("mmap failed at %llx: fail over to traditional read\n",
1045 offset);
1047 data = mmap(NULL, segsize,
1048 PROT_READ | PROT_WRITE,
1049 MAP_PRIVATE | MAP_ANONYMOUS,
1050 0, 0);
1051 if (data == MAP_FAILED)
1052 return -1;
1054 for (i = 0; i < n; ++i)
1055 caches[i]->data = data + (i << FILE_BLOCK_SHIFT);
1056 return file_load_data(file, caches, n, offset);
1059 for (i = 0; i < n; ++i)
1060 caches[i]->data = data + (i << FILE_BLOCK_SHIFT);
1062 BDEBUG("Loaded data at phys %llx up to %llx\n",
1063 offset, offset + segsize);
1064 return 0;
1066 # define file_load_data file_load_data_mmap
1068 #endif
1070 /* Find the block with the lowest physical position that intersects
1071 * the loaded segment. The search starts at @block.
1073 static struct hed_block *
1074 first_load_block(struct hed_tree *tree, struct hed_block *block,
1075 hed_uoff_t segstart)
1077 struct hed_block *prev = block;
1078 do {
1079 block = prev;
1080 prev = prev_block(prev);
1081 } while (!hed_block_is_eof(prev) && phys_end(prev) > segstart);
1082 return block;
1085 static int
1086 load_blocks(struct hed_file *file, const blockoff_t *from)
1088 hed_uoff_t physpos, segstart;
1089 struct hed_block_data *preload[FILE_READAHEAD];
1090 size_t ra_bkw, ra_fwd, ra_off;
1091 hed_cursor_t pos;
1092 int nblocks;
1094 segstart = hed_cursor_phys_pos(from);
1095 ra_bkw = FILE_BLOCK_OFF(segstart);
1096 ra_fwd = FILE_BLOCK_SIZE - ra_bkw;
1098 if (file_ra_forward(file))
1099 ra_fwd += (FILE_READAHEAD - 1) << FILE_BLOCK_SHIFT;
1100 else if (file_ra_backward(file))
1101 ra_bkw += (FILE_READAHEAD - 1) << FILE_BLOCK_SHIFT;
1103 if (ra_bkw > segstart)
1104 ra_bkw = segstart;
1105 if (ra_fwd > file->phys_size - segstart)
1106 ra_fwd = file->phys_size - segstart;
1108 segstart -= ra_bkw;
1109 ra_fwd += ra_bkw;
1110 pos.block = first_load_block(file_blocks(file), from->block, segstart);
1111 pos.off = segstart >= pos.block->phys_pos
1112 ? segstart - pos.block->phys_pos
1113 : 0;
1115 list_add(&pos.list, &pos.block->refs);
1116 nblocks = ((ra_fwd - 1) >> FILE_BLOCK_SHIFT) + 1;
1117 alloc_caches(file, preload, nblocks);
1118 put_cursor(&pos);
1120 if (file_load_data(file, preload, nblocks, segstart)) {
1121 free_caches(file, preload, nblocks);
1122 return -1;
1125 while (physpos = hed_cursor_phys_pos(&pos),
1126 ra_off = physpos - segstart,
1127 ra_off < ra_fwd) {
1128 struct hed_block_data *dataobj;
1129 struct hed_block *newblock;
1130 size_t datalen;
1132 if (!hed_block_is_virtual(pos.block)) {
1133 pos.block = next_block(pos.block);
1134 pos.off = 0;
1135 continue;
1138 datalen = FILE_BLOCK_SIZE - FILE_BLOCK_OFF(physpos);
1139 if (datalen > hed_block_size(pos.block) - pos.off)
1140 datalen = hed_block_size(pos.block) - pos.off;
1142 dataobj = preload[ra_off >> FILE_BLOCK_SHIFT];
1143 newblock = dataobj
1144 ? new_data_block(file, physpos, datalen, dataobj)
1145 : new_virt_block(file, physpos, datalen,
1146 HED_BLOCK_BAD);
1148 /* Punch the new block */
1149 BDEBUG("Add %s block at %llx, length %llx\n",
1150 hed_block_is_virtual(newblock) ? "error" : "physical",
1151 newblock->phys_pos, newblock->t.size);
1152 if (replace_chunk(file, pos.block, pos.off, newblock)) {
1153 file_free_block(file, newblock);
1154 free_caches(file, preload, nblocks);
1155 return -1;
1158 pos.block = next_block(newblock);
1159 pos.off = 0;
1162 /* All cache objects now have an extra reference from the
1163 * allocation. Drop it. */
1164 free_caches(file, preload, nblocks);
1166 dump_blocks(file);
1167 return 0;
1170 /* Shorten a block at beginning and enlarge the preceding block.
1172 * Re-allocate at most @len bytes from the beginning of @block to the
1173 * end of the preceding block.
1174 * If @block is virtual, this will effectively devirtualize the range.
1175 * If @block is not virtual, this will change the backing store of
1176 * the bytes in the range.
1177 * Returns: the number of bytes actually moved.
1179 static size_t
1180 shrink_at_begin(struct hed_file *file, struct file_block *block,
1181 size_t len, long state)
1183 struct file_block *prev = prev_block(block);
1184 size_t maxgrow;
1186 /* Basic assumptions */
1187 assert(!(state & HED_BLOCK_VIRTUAL));
1189 /* The previous block must exist: */
1190 if (hed_block_is_eof(prev))
1191 return 0;
1193 /* The block flags must match the requested @state: */
1194 if ((prev->flags & HED_BLOCK_STATEMASK) != state)
1195 return 0;
1197 /* No deletions at end, or similar: */
1198 if (prev->phys_pos + prev->t.size != block->phys_pos)
1199 return 0;
1201 /* Append less bytes than requested if not all are available */
1202 assert(prev->t.size <= prev->dataobj->size);
1203 maxgrow = prev->dataobj->size - prev->dataoff - prev->t.size;
1204 if (len > maxgrow)
1205 len = maxgrow;
1206 if (!len)
1207 return 0;
1209 BDEBUG("Appending 0:%lx to the previous block\n", len);
1211 /* Move cursors away from the to-be-chopped beginning */
1212 move_cursors(block, prev, 0, len - 1, prev->t.size);
1214 /* Enlarge the previous block */
1215 prev->t.size += len;
1216 recalc_block_recursive(prev);
1218 /* Shorten the original block */
1219 block->t.size -= len;
1220 block->dataoff += len;
1221 block->phys_pos += len;
1222 recalc_block_recursive(block);
1223 return len;
1226 /* Shorten a block at end and enlarge the following block.
1228 * Re-allocate at most @len bytes from the end of @block to the
1229 * beginning of the following block.
1230 * If @block is virtual, this will effectively devirtualize the range.
1231 * If @block is not virtual, this will change the backing store of
1232 * the bytes in the range.
1233 * Returns: the number of bytes actually moved.
1235 static size_t
1236 shrink_at_end(struct hed_file *file, struct file_block *block,
1237 size_t len, long state)
1239 struct file_block *next = next_block(block);
1240 hed_uoff_t off;
1242 /* Basic assumptions */
1243 assert(!(state & HED_BLOCK_VIRTUAL));
1245 /* The next block must exist: */
1246 if (hed_block_is_eof(block))
1247 return 0;
1249 /* The block flags must match the requested @state: */
1250 if ((next->flags & HED_BLOCK_STATEMASK) != state)
1251 return 0;
1253 /* No deletions at end, or similar: */
1254 if (block->phys_pos + block->t.size != next->phys_pos)
1255 return 0;
1257 /* Prepend less bytes than requested if not all are available */
1258 if (len > next->dataoff)
1259 len = next->dataoff;
1260 if (!len)
1261 return 0;
1262 off = block->t.size - len;
1264 BDEBUG("Prepending %llx:%lx to the next block\n", off, len);
1266 /* Shift cursors in the new physical block */
1267 update_cursors(next, next, len);
1269 /* Move cursors away from the to-be-chopped end */
1270 move_cursors(block, next, off, UOFF_MAX, -off);
1272 /* Enlarge the next block */
1273 next->dataoff -= len;
1274 next->phys_pos -= len;
1275 next->t.size += len;
1276 recalc_block_recursive(next);
1278 /* Shorten the original block */
1279 block->t.size -= len;
1280 recalc_block_recursive(block);
1281 return len;
1284 /* Search for an existing data object within the same physical block
1285 * as @curs, and having the given @state flags.
1287 static struct hed_block_data *
1288 search_data(struct hed_file *file, const hed_cursor_t *curs, long state)
1290 struct file_block *block;
1291 hed_uoff_t physpos;
1293 physpos = FILE_BLOCK_ROUND(curs->block->phys_pos + curs->off);
1294 BDEBUG("Search for already loaded data at %llx starting at %llx...",
1295 physpos, curs->block->phys_pos);
1297 /* Search backwards */
1298 block = curs->block;
1299 while (!hed_block_is_eof(block = prev_block(block))) {
1300 if (block->phys_pos < physpos)
1301 break;
1302 if ((block->flags & HED_BLOCK_STATEMASK) == state) {
1303 BDEBUG(" found at %llx\n", block->phys_pos);
1304 assert(block->dataobj);
1305 return block->dataobj;
1309 /* Search forwards */
1310 block = curs->block;
1311 while (!hed_block_is_eof(block)) {
1312 block = next_block(block);
1313 if (block->phys_pos >= physpos + FILE_BLOCK_SIZE)
1314 break;
1315 if ((block->flags & HED_BLOCK_STATEMASK) == state) {
1316 BDEBUG(" found at %llx\n", block->phys_pos);
1317 assert(block->dataobj);
1318 return block->dataobj;
1322 BDEBUG(" not found\n");
1323 return NULL;
1326 static int
1327 reuse_loaded_data(struct hed_file *file, const blockoff_t *blockoff,
1328 struct hed_block_data *data)
1330 struct file_block *physblock;
1331 struct file_block *block = blockoff->block;
1332 hed_uoff_t block_offset = blockoff->off;
1333 hed_uoff_t physpos = block->phys_pos + block_offset;
1334 size_t part = FILE_BLOCK_OFF(physpos);
1335 size_t len =
1336 FILE_BLOCK_SIZE - part <= block->t.size - block_offset
1337 ? FILE_BLOCK_SIZE - part
1338 : block->t.size - block_offset;
1340 if (part > block_offset)
1341 part = block_offset;
1342 physpos -= part;
1343 len += part;
1344 block_offset -= part;
1346 if (! (physblock = new_data_block(file, physpos, len, data)) )
1347 return -1;
1349 BDEBUG("Add physical block at %llx, length %llx\n",
1350 physblock->phys_pos, physblock->t.size);
1351 if (replace_chunk(file, block, block_offset, physblock)) {
1352 file_free_block(file, physblock);
1353 return -1;
1356 dump_blocks(file);
1357 return 0;
1360 /* Replace a part of a virtual block with content loaded
1361 * from disk. The amount of data loaded from the disk depends
1362 * on various factors with the goal to choose the most efficient
1363 * ratio. The only guarantee is that the byte at @blockoff will
1364 * be in a non-virtual block when this function returns 0.
1366 static int
1367 devirtualize_clean(struct hed_file *file, const blockoff_t *blockoff)
1369 struct file_block *block = blockoff->block;
1370 hed_uoff_t block_offset = blockoff->off;
1371 hed_uoff_t remain = block->t.size - block_offset;
1372 struct hed_block_data *data;
1374 BDEBUG("punch a clean hole at %llx into %llx:%llx\n", block_offset,
1375 block_offset(block), block->t.size);
1376 assert(hed_block_is_virtual(block));
1378 /* Check if we can combine with a neighbouring block */
1379 if (shrink_at_begin(file, block, SIZE_MAX, 0) > block_offset ||
1380 shrink_at_end(file, block, SIZE_MAX, 0) >= remain) {
1381 kill_block_if_empty(file, block);
1382 dump_blocks(file);
1383 return 0;
1386 /* Check if the block is already loaded elsewhere */
1387 data = search_data(file, blockoff, 0);
1388 return data
1389 ? reuse_loaded_data(file, blockoff, data)
1390 : load_blocks(file, blockoff);
1393 /* Replace at most @len bytes of a virtual block with a newly
1394 * allocated out-of-cache block. The block is marked dirty
1395 * and its data is left uninitialized.
1396 * If the block at @blockoff is not virtual, make it dirty.
1397 * Note that this function may devirtualize less than @len bytes.
1398 * In the worst case only 1 byte at @blockoff will be available.
1400 static int
1401 prepare_modify(struct hed_file *file, blockoff_t *blockoff, size_t len)
1403 struct file_block *block = blockoff->block;
1404 hed_uoff_t block_offset = blockoff->off;
1405 hed_uoff_t remain = block->t.size - block_offset;
1406 struct file_block *newblock;
1408 if (hed_block_is_dirty(block))
1409 return 0;
1411 if (len > remain)
1412 len = remain;
1414 BDEBUG("punch a dirty hole at %llx:%lx into %llx:%llx\n",
1415 block_offset, len,
1416 block_offset(block), block->t.size);
1418 /* Check if we can combine with a neighbouring block */
1419 if ((block_offset == 0 &&
1420 shrink_at_begin(file, block, len, HED_BLOCK_DIRTY)) ||
1421 (remain == len &&
1422 shrink_at_end(file, block, len, HED_BLOCK_DIRTY) >= len)) {
1423 kill_block_if_empty(file, block);
1424 dump_blocks(file);
1425 return 0;
1428 /* Initialize a new block */
1429 newblock = new_block(file, HED_BLOCK_EXCACHE | HED_BLOCK_DIRTY);
1430 if (!newblock)
1431 goto out_err;
1433 /* Allocate data */
1434 if ( (newblock->dataobj = search_data(file, blockoff,
1435 HED_BLOCK_DIRTY)) )
1436 cache_get(newblock->dataobj);
1437 else if (! (newblock->dataobj = block_data_new(file->cache,
1438 FILE_BLOCK_SIZE)) )
1439 goto out_err_free;
1441 newblock->phys_pos = block->phys_pos + block_offset;
1442 newblock->dataoff = FILE_BLOCK_OFF(newblock->phys_pos);
1443 if (len > FILE_BLOCK_SIZE - newblock->dataoff)
1444 len = FILE_BLOCK_SIZE - newblock->dataoff;
1445 newblock->t.size = len;
1447 if (replace_chunk(file, block, block_offset, newblock))
1448 goto out_err_free;
1450 dump_blocks(file);
1451 return 0;
1453 out_err_free:
1454 file_free_block(file, newblock);
1455 out_err:
1456 return -1;
1459 /* Ensure that @curs points to an up-to-date non-virtual block.
1460 * Load the data from disk if necessary, return zero on failure. */
1461 size_t
1462 hed_prepare_read(struct hed_file *file, const hed_cursor_t *curs, size_t len)
1464 struct file_block *block = curs->block;
1465 if (hed_block_is_inner_virtual(block) &&
1466 devirtualize_clean(file, curs) < 0)
1467 return 0;
1469 return hed_cursor_chunk_len(curs, len);
1472 /* Move the block pointer to the next block */
1473 static struct hed_block *
1474 cursor_next_block(hed_cursor_t *curs)
1476 struct hed_block *block = next_nonzero_block(curs->block);
1478 if (block) {
1479 curs->block = block;
1480 curs->off = 0;
1481 list_move(&curs->list, &block->refs);
1483 return block;
1486 /* This is an optimized way of doing:
1488 * hed_move_relative(curs, curs->block->t.size);
1490 * for the case when curs->off == 0.
1492 static struct hed_block *
1493 move_to_next(hed_cursor_t *curs)
1495 curs->pos += hed_block_size(curs->block);
1496 return cursor_next_block(curs);
1499 /* Copy in @count bytes from @pos.
1500 * Returns the number of bytes that were not read (i.e. zero on success).
1501 * The @pos blockoff is moved by the amount of data read.
1502 * CAUTION: If you read up to MAX_BLOCKOFF, then @pos points one byte
1503 * beyond the EOF block upon return.
1505 static size_t
1506 copy_in(struct hed_file *file, void *buf, size_t count, blockoff_t *pos)
1508 size_t cpylen;
1510 pos->pos += count;
1511 while (count && (cpylen = hed_prepare_read(file, pos, count))) {
1512 if (hed_block_is_virtual(pos->block))
1513 memset(buf, 0, cpylen);
1514 else
1515 memcpy(buf, hed_cursor_data(pos), cpylen);
1517 buf += cpylen;
1518 count -= cpylen;
1519 if ( (pos->off += cpylen) >= hed_block_size(pos->block) )
1520 if (!cursor_next_block(pos))
1521 break;
1523 pos->pos -= count;
1524 return count;
1527 size_t
1528 hed_file_cpin(struct hed_file *file, void *buf, size_t count,
1529 const hed_cursor_t *pos)
1531 blockoff_t mypos;
1532 size_t ret;
1534 hed_dup_cursor(pos, &mypos);
1535 ret = copy_in(file, buf, count, &mypos);
1536 put_cursor(&mypos);
1537 return ret;
1540 /* Set the modified flag */
1541 static inline void
1542 set_modified(struct hed_file *file)
1544 file->modified = true;
1547 /* Clear the modified flag */
1548 static inline void
1549 clear_modified(struct hed_file *file)
1551 file->modified = false;
1555 hed_file_set_byte(struct hed_file *file, blockoff_t *blockoff,
1556 unsigned char byte)
1558 hed_uoff_t offset = blockoff->pos;
1560 if (prepare_modify(file, blockoff, 1))
1561 return -1;
1562 set_modified(file);
1564 if (offset >= file->size)
1565 file->size = offset + 1;
1567 hed_block_data(blockoff->block)[blockoff->off] = byte;
1568 return 0;
1571 size_t
1572 hed_file_set_block(struct hed_file *file, blockoff_t *blockoff,
1573 unsigned char *buf, size_t len)
1575 while (len) {
1576 size_t span;
1578 if (prepare_modify(file, blockoff, len))
1579 break;
1580 set_modified(file);
1582 span = hed_cursor_chunk_len(blockoff, len);
1583 memcpy(hed_cursor_data(blockoff), buf, span);
1584 buf += span;
1585 len -= span;
1586 move_rel_fast(blockoff, span);
1588 if (blockoff->pos > file->size)
1589 file->size = blockoff->pos;
1591 return len;
1594 hed_uoff_t
1595 hed_file_set_bytes(struct hed_file *file, blockoff_t *blockoff,
1596 unsigned char byte, hed_uoff_t rep)
1598 while (rep) {
1599 size_t span;
1601 if (prepare_modify(file, blockoff, rep))
1602 break;
1603 set_modified(file);
1605 span = hed_cursor_chunk_len(blockoff, rep);
1606 memset(hed_cursor_data(blockoff), byte, span);
1607 rep -= span;
1608 move_rel_fast(blockoff, span);
1610 if (blockoff->pos > file->size)
1611 file->size = blockoff->pos;
1613 return rep;
1616 static int
1617 file_erase_continuous(struct hed_file *file, blockoff_t *blockoff, size_t len)
1619 struct file_block *block = blockoff->block;
1620 hed_uoff_t block_offset = blockoff->off;
1622 /* Find the new position */
1623 hed_move_relative(blockoff, len);
1625 /* Move all other cursors in the erased range to the new position */
1626 assert(len > 0);
1627 move_cursors_abs(block, block_offset,
1628 block_offset + len - 1, blockoff);
1630 if (!block_offset) {
1631 block->dataoff += len;
1632 if (!hed_block_is_inserted(block))
1633 block->phys_pos += len;
1634 } else if (block_offset + len < block->t.size &&
1635 !split_block(file, block, block_offset + len))
1636 return -1;
1638 move_cursors(block, block, block_offset, UOFF_MAX, -(hed_uoff_t)len);
1640 block->t.size -= len;
1641 recalc_block_recursive(block);
1643 kill_block_if_empty(file, block);
1644 return 0;
1647 size_t
1648 hed_file_erase_block(struct hed_file *file, blockoff_t *blockoff,
1649 hed_uoff_t len)
1651 hed_uoff_t offset;
1652 hed_uoff_t todo;
1654 offset = blockoff->pos;
1655 if (offset > file_size(file))
1656 return 0;
1657 else if (len > file_size(file) - offset)
1658 len = file_size(file) - offset;
1660 todo = len;
1661 while (todo) {
1662 size_t span = hed_cursor_chunk_len(blockoff, todo);
1664 if (file_erase_continuous(file, blockoff, span))
1665 break;
1667 todo -= span;
1669 len -= todo;
1671 file->size -= len;
1672 set_modified(file);
1674 file->eof_block.t.size += len;
1675 recalc_block_recursive(&file->eof_block);
1677 struct hed_block *slideblock = prev_block(blockoff->block);
1678 if (hed_block_is_eof(slideblock))
1679 slideblock = blockoff->block;
1680 slide_cursors(file, slideblock, blockoff->pos, -len);
1682 return todo;
1686 hed_file_insert_begin(struct hed_file *file, const hed_cursor_t *curs,
1687 hed_cursor_t *curs_ins)
1689 struct file_block *block, *newblock;
1691 BDEBUG("Starting insert at %llx\n", curs->pos);
1693 newblock = new_block(file,
1694 HED_BLOCK_EXCACHE | HED_BLOCK_INSERTED);
1695 if (!newblock)
1696 return -1;
1698 newblock->phys_pos = hed_cursor_phys_pos(curs);
1699 newblock->dataobj = block_data_new(file->cache, FILE_BLOCK_SIZE);
1700 if (!newblock->dataobj) {
1701 file_free_block(file, newblock);
1702 return -1;
1705 block = curs->block;
1706 if (curs->off) {
1707 block = split_block(file, block, curs->off);
1708 if (!block) {
1709 file_free_block(file, newblock);
1710 return -1;
1714 chain_block_before(file_blocks(file), newblock, block);
1716 curs_ins->pos = curs->pos;
1717 curs_ins->off = newblock->t.size;
1718 curs_ins->block = newblock;
1719 list_add(&curs_ins->list, &newblock->refs);
1721 dump_blocks(file);
1722 return 0;
1725 void
1726 hed_file_insert_end(struct hed_file *file, blockoff_t *blockoff_ins)
1728 struct file_block *block = blockoff_ins->block;
1730 BDEBUG("End insert at %llx\n", blockoff_ins->pos);
1732 blockoff_ins->block = NULL;
1733 list_del(&blockoff_ins->list);
1734 if (!kill_block_if_empty(file, block))
1735 block_data_shrink(file->cache, block->dataobj,
1736 block->dataoff + block->t.size);
1738 dump_blocks(file);
1741 static void
1742 insert_block(struct hed_file *file, blockoff_t *blockoff,
1743 unsigned char *buf, size_t len)
1745 struct file_block *block = blockoff->block;
1746 hed_uoff_t offset = blockoff->pos;
1748 assert(file && offset >= 0);
1750 assert(hed_block_is_excache(block));
1751 hed_block_set_dirty(block);
1752 set_modified(file);
1754 memcpy(hed_block_data(block) + blockoff->off, buf, len);
1755 block->t.size += len;
1756 recalc_block_recursive(block);
1757 blockoff->off += len;
1758 blockoff->pos += len;
1760 if (blockoff->pos > file->size)
1761 file->size = blockoff->pos;
1762 else
1763 file->size += len;
1765 slide_cursors(file, next_block(block), offset, len);
1768 size_t
1769 hed_file_insert_block(struct hed_file *file, blockoff_t *blockoff,
1770 unsigned char *buf, size_t len)
1772 while (len) {
1773 struct file_block *block = blockoff->block;
1774 size_t remain = block->dataobj->size - blockoff->off;
1776 if (!remain) {
1777 list_del(&blockoff->list);
1778 blockoff->block = next_block(block);
1779 blockoff->off = 0;
1781 if (!hed_file_insert_begin(file, blockoff, blockoff))
1782 continue;
1784 blockoff->block = block;
1785 blockoff->off = block->t.size;
1786 list_add(&blockoff->list, &block->refs);
1787 break;
1790 if (remain > len)
1791 remain = len;
1793 BDEBUG("Append %ld bytes to the insert block\n",
1794 (long) remain);
1795 insert_block(file, blockoff, buf, remain);
1796 buf += remain;
1797 len -= remain;
1799 return len;
1803 hed_file_insert_byte(struct hed_file *file, blockoff_t *blockoff,
1804 unsigned char byte)
1806 return hed_file_insert_block(file, blockoff, &byte, 1);
1809 size_t
1810 hed_file_insert_once(struct hed_file *file, blockoff_t *blockoff,
1811 unsigned char *buf, size_t len)
1813 blockoff_t insert;
1815 if (!hed_file_insert_begin(file, blockoff, &insert)) {
1816 len = hed_file_insert_block(file, &insert, buf, len);
1817 hed_file_insert_end(file, &insert);
1819 return len;
1822 struct commit_control {
1823 struct hed_file *file;
1824 int wfd; /* file descriptor for writing */
1825 int needwrite; /* non-zero if write is needed */
1826 blockoff_t begoff, endoff;
1827 hed_off_t shift;
1828 void *partbuf; /* allocated 3*FILE_BLOCK_SIZE */
1829 void *partial; /* pointer into partbuf */
1832 /* Get the logical<->physical shift value after the specified block.
1833 * It is essential to get the value _AFTER_ the block, because the
1834 * shift value is used to decide how the current block will be handled.
1836 static hed_off_t
1837 get_shift(const blockoff_t *blockoff)
1839 struct file_block *block = blockoff->block;
1840 size_t curshift = hed_block_is_inserted(block) ? block->t.size : 0;
1841 return curshift +
1842 blockoff->pos - blockoff->off - block->phys_pos;
1845 static void
1846 switch_partial(struct commit_control *cc)
1848 cc->partial += FILE_BLOCK_SIZE;
1849 if (cc->partial >= cc->partbuf + 3*FILE_BLOCK_SIZE)
1850 cc->partial = cc->partbuf;
1853 /* Write @writelen bytes from the partial buffer at @cc->begoff. */
1854 static int
1855 commit_block(struct commit_control *cc, size_t len)
1857 ssize_t written;
1859 assert(len > 0);
1860 BDEBUG(" -> write %lx bytes at %llx\n",
1861 (unsigned long)len, cc->begoff.pos - len);
1862 written = pwrite(cc->wfd, cc->partial, len, cc->begoff.pos - len);
1863 if (written < len)
1864 /* TODO: keep data in a new list of dirty blocks */
1865 return -1;
1866 return 0;
1869 static int
1870 commit_partial(struct commit_control *cc)
1872 size_t partoff, remain, left;
1873 size_t writelen;
1875 partoff = FILE_BLOCK_OFF(cc->begoff.pos);
1876 remain = FILE_BLOCK_SIZE - partoff;
1877 if (remain > cc->endoff.pos - cc->begoff.pos)
1878 remain = cc->endoff.pos - cc->begoff.pos;
1879 if ((writelen = partoff + remain) == 0)
1880 return 0;
1882 BDEBUG("Fill partial %llx-%llx\n",
1883 cc->begoff.pos, cc->begoff.pos + remain);
1885 left = copy_in(cc->file, cc->partial + partoff, remain, &cc->begoff);
1886 if (left) {
1887 hed_move_relative(&cc->begoff, left);
1888 return -1;
1891 if (FILE_BLOCK_OFF(cc->begoff.pos) &&
1892 !hed_block_is_eof(cc->begoff.block))
1893 return 0;
1895 return commit_block(cc, writelen);
1898 /* Commit forwards.
1899 * Beware, cc->begoff is undefined upon return!
1901 static int
1902 commit_forwards(struct commit_control *cc)
1904 hed_uoff_t endpos = cc->endoff.pos;
1905 int ret = 0;
1907 BDEBUG("Writing forwards %llx-%llx\n",
1908 cc->begoff.pos, cc->endoff.pos);
1910 if (!cc->needwrite)
1911 return 0;
1913 while (cc->begoff.pos < endpos)
1914 ret |= commit_partial(cc);
1916 return ret;
1919 /* Commit forwards.
1920 * Beware, cc->begoff is undefined upon return!
1922 static int
1923 commit_backwards(struct commit_control *cc)
1925 void *retpartial = cc->partial;
1926 hed_uoff_t begpos = cc->begoff.pos;
1927 hed_uoff_t blkpos; /* start of current partial block */
1928 int ret = 0;
1930 BDEBUG("Writing backwards %llx-%llx\n",
1931 cc->begoff.pos, cc->endoff.pos);
1933 if (!cc->needwrite)
1934 return 0;
1936 blkpos = FILE_BLOCK_ROUND(cc->endoff.pos);
1937 if (blkpos <= begpos)
1938 goto final;
1940 /* Handle the trailing partial block */
1941 hed_update_cursor(cc->file, blkpos, &cc->begoff);
1942 switch_partial(cc);
1943 ret |= commit_partial(cc);
1944 retpartial = cc->partial;
1946 /* Handle the middle part */
1947 switch_partial(cc);
1948 while ( (blkpos -= FILE_BLOCK_SIZE) > begpos) {
1949 hed_update_cursor(cc->file, blkpos, &cc->begoff);
1950 ret |= commit_partial(cc);
1952 switch_partial(cc); /* wrap around */
1954 final:
1955 /* Handle the first block (partiall or not) */
1956 hed_update_cursor(cc->file, begpos, &cc->begoff);
1957 ret |= commit_partial(cc);
1959 cc->partial = retpartial;
1960 return ret;
1963 /* Handle the partial block before a skipped one. */
1964 static int
1965 begin_skip(struct commit_control *cc)
1967 size_t minsize = FILE_BLOCK_SIZE - FILE_BLOCK_OFF(cc->endoff.pos);
1968 size_t remain;
1969 int ret = 0;
1971 /* Check if at least one complete physical block can be skipped */
1972 if (cc->endoff.block->t.size < minsize)
1973 return 0;
1975 /* Write out the partially dirty block */
1976 remain = FILE_BLOCK_OFF(minsize);
1977 hed_move_relative(&cc->endoff, remain);
1978 if (cc->shift <= 0)
1979 ret |= commit_forwards(cc);
1980 else
1981 ret |= commit_backwards(cc);
1982 hed_move_relative(&cc->endoff, -(hed_off_t)remain);
1983 hed_dup2_cursor(&cc->endoff, &cc->begoff);
1985 cc->needwrite = 0;
1986 return ret;
1989 /* Handle the last partially skipped physical block. */
1990 static int
1991 end_skip(struct commit_control *cc)
1993 size_t partlen;
1994 int ret = 0;
1996 /* Find the beginning of the physical block */
1997 hed_dup2_cursor(&cc->endoff, &cc->begoff);
1998 partlen = FILE_BLOCK_OFF(cc->begoff.pos);
1999 hed_move_relative(&cc->begoff, -(hed_off_t)partlen);
2001 /* Read the partial data before this block */
2002 if (hed_file_cpin(cc->file, cc->partial, partlen, &cc->begoff))
2003 ret = -1;
2005 cc->needwrite = 1;
2006 return ret;
2009 static void
2010 undirty_blocks(struct hed_file *file)
2012 struct file_block *block, *next;
2013 hed_uoff_t pos = 0;
2015 BDEBUG("Undirtying blocks:\n");
2016 dump_blocks(file);
2018 next = first_block(file);
2019 while (!hed_block_is_eof(next)) {
2020 block = next;
2021 next = next_block(block);
2023 if (kill_block_if_empty(file, block))
2024 continue;
2026 if (!hed_block_is_virtual(block)) {
2027 cache_put(file->cache, block->dataobj);
2028 block->dataobj = NULL;
2029 list_del_init(&block->lru);
2030 block->flags = HED_BLOCK_EXCACHE | HED_BLOCK_VIRTUAL;
2033 block->phys_pos = pos;
2034 pos += block->t.size;
2037 block = first_block(file);
2038 while (!hed_block_is_eof(block)) {
2039 next = next_block(block);
2040 file_kill_block(file, block);
2041 block = next;
2044 BDEBUG("After undirtying\n");
2045 dump_blocks(file);
2048 static int
2049 commit_init(struct commit_control *cc, struct hed_file *file)
2051 cc->file = file;
2053 cc->partbuf = malloc(3*FILE_BLOCK_SIZE);
2054 if (!cc->partbuf)
2055 goto err;
2057 cc->wfd = open(file->name,
2058 O_RDWR | (file->fd < 0 ? O_CREAT : 0), 0666);
2059 if (cc->wfd < 0)
2060 goto err_free;
2062 if (file->fd < 0 &&
2063 (file->fd = open(file->name, O_RDONLY)) < 0)
2064 goto err_close;
2066 return 0;
2068 err_close:
2069 close(cc->wfd);
2070 err_free:
2071 free(cc->partbuf);
2072 err:
2073 return -1;
2077 hed_file_commit(struct hed_file *file)
2079 struct commit_control cc;
2080 int ret = 0;
2082 if (commit_init(&cc, file))
2083 return -1;
2085 dump_blocks(file);
2087 cc.partial = cc.partbuf;
2088 get_cursor(file, 0,&cc.begoff);
2089 hed_dup_cursor(&cc.begoff, &cc.endoff);
2090 cc.shift = -cc.begoff.block->phys_pos;
2091 cc.needwrite = 0;
2092 while(!hed_block_is_eof(cc.endoff.block)) {
2093 hed_off_t newshift = cc.endoff.pos < file->phys_size
2094 ? get_shift(&cc.endoff)
2095 : 0;
2097 if (cc.shift <= 0 && newshift > 0) {
2098 ret |= commit_forwards(&cc);
2099 hed_dup2_cursor(&cc.endoff, &cc.begoff);
2100 } else if (cc.shift > 0 && newshift <= 0) {
2101 ret |= commit_backwards(&cc);
2102 hed_dup2_cursor(&cc.endoff, &cc.begoff);
2104 cc.shift = newshift;
2106 if (!newshift && !hed_block_is_dirty(cc.endoff.block)) {
2107 if (cc.needwrite)
2108 ret |= begin_skip(&cc);
2109 } else if (!cc.needwrite)
2110 ret |= end_skip(&cc);
2112 if (!move_to_next(&cc.endoff))
2113 break;
2115 assert(cc.endoff.pos == file_size(file));
2117 if (cc.begoff.pos < file_size(file)) {
2118 if (cc.shift <= 0)
2119 ret |= commit_forwards(&cc);
2120 else
2121 ret |= commit_backwards(&cc);
2124 put_cursor(&cc.begoff);
2125 put_cursor(&cc.endoff);
2127 ftruncate(cc.wfd, file_size(file));
2128 file->phys_size = file_size(file);
2130 ret |= close(cc.wfd);
2131 free(cc.partbuf);
2133 undirty_blocks(file);
2135 if (!ret)
2136 clear_modified(file);
2138 return ret;
2141 #ifdef HED_CONFIG_SWAP
2143 hed_file_write_swap(struct hed_file *file)
2145 return swp_write(file_swp(file));
2148 static int
2149 do_read_swap(struct hed_file *file, struct swp_file *swp, blockoff_t *pos)
2151 struct hed_file *swpfile = swp_private(swp);
2152 struct file_block *cur, block;
2153 hed_uoff_t phys_pos;
2155 if (file_stat(swpfile)->st_size != file_stat(file)->st_size ||
2156 file_stat(swpfile)->st_mtime != file_stat(file)->st_mtime) {
2157 fprintf(stderr, "stat info mismatch (you modified the file since hed ran on it; refusing to touch it)\n");
2158 return -1;
2161 BDEBUG("Swap header match\n");
2163 phys_pos = 0;
2164 cur = first_block(swpfile);
2165 do {
2166 struct hed_block_data dataobj;
2167 size_t (*mergefn)(struct hed_file*, blockoff_t*,
2168 unsigned char*, size_t);
2169 void *data;
2170 size_t res;
2172 if (swp_cpin(swp, &block, cur, sizeof(struct file_block))) {
2173 perror("Cannot read block descriptor");
2174 return -1;
2176 BDEBUG("BLOCK %p: flags %02lx phys 0x%02llx size 0x%llx\n",
2177 cur, block.flags, (long long)block.phys_pos,
2178 (long long)hed_block_size(&block));
2180 if (block.phys_pos - phys_pos) {
2181 if (hed_file_erase_block(file, pos,
2182 block.phys_pos - phys_pos)) {
2183 perror("Cannot erase");
2184 return -1;
2186 phys_pos = block.phys_pos;
2189 if (!hed_block_is_inserted(&block))
2190 phys_pos += hed_block_size(&block);
2192 if (!hed_block_is_dirty(&block)) {
2193 hed_move_relative(pos, hed_block_size(&block));
2194 continue;
2197 if (swp_cpin(swp, &dataobj, block.dataobj,
2198 sizeof(struct hed_block_data))) {
2199 perror("Cannot read data descriptor");
2200 return -1;
2202 BDEBUG("DATA %p: size 0x%lx\n",
2203 block.dataobj, (long)dataobj.size);
2205 if (! (data = malloc(hed_block_size(&block))) ) {
2206 perror("Cannot allocate data");
2207 return -1;
2210 if (swp_cpin(swp, data, dataobj.data + block.dataoff,
2211 hed_block_size(&block))) {
2212 perror("Cannot read data");
2213 return -1;
2216 mergefn = hed_block_is_inserted(&block)
2217 ? hed_file_insert_once
2218 : hed_file_set_block;
2219 res = mergefn(file, pos, data, hed_block_size(&block));
2220 free(data);
2221 if (res) {
2222 perror("Cannot merge data");
2223 return -1;
2225 } while (cur = next_block(&block), !hed_block_is_eof(&block));
2227 dump_blocks(file);
2228 return 0;
2232 hed_file_read_swap(struct hed_file *file)
2234 struct swp_file *swp;
2235 blockoff_t pos;
2236 int ret;
2238 if (! (swp = swp_init_read(file->swpname)) )
2239 return -1;
2241 get_cursor(file, 0, &pos);
2242 ret = do_read_swap(file, swp, &pos);
2243 put_cursor(&pos);
2245 swp_done(swp);
2246 return ret;
2249 #endif /* HED_CONFIG_SWAP */
2251 struct ffb_hookdata {
2252 struct hed_file *file;
2253 blockoff_t *pos;
2254 hed_expr_reg_cb base_ecb;
2255 void *base_ecb_data;
2258 static long
2259 eval_reg_cb(void *hookdata, char reg, hed_off_t ofs,
2260 unsigned char *scramble, size_t len)
2262 struct ffb_hookdata *data = hookdata;
2263 if (reg == '.') {
2264 blockoff_t pos;
2265 long ret = HED_AEF_DYNAMIC;
2267 hed_dup_cursor(data->pos, &pos);
2268 hed_move_relative(&pos, ofs);
2269 if (copy_in(data->file, scramble, len, &pos))
2270 ret = HED_AEF_ERROR;
2271 put_cursor(&pos);
2272 return ret;
2275 return data->base_ecb(data->base_ecb_data, reg, ofs, scramble, len);
2278 static void
2279 reverse(unsigned char *p, size_t len)
2281 unsigned char *q = p + len;
2282 while (p < q) {
2283 unsigned char x = *p;
2284 *p++ = *--q;
2285 *q = x;
2289 static void
2290 compute_badchar(ssize_t *badchar, const unsigned char *s, ssize_t len)
2292 size_t i = 1;
2293 while (i < len)
2294 badchar[*s++] = i++;
2297 static void
2298 compute_sfx(ssize_t *sfx, const unsigned char *s, ssize_t len)
2300 ssize_t f, g, i;
2302 sfx[len - 1] = len;
2303 g = len - 1;
2304 for (i = len - 2; i >= 0; --i) {
2305 if (i > g && sfx[i + len - 1 - f] < i - g)
2306 sfx[i] = sfx[i + len - 1 - f];
2307 else {
2308 if (i < g)
2309 g = i;
2310 f = i;
2311 while (g >= 0 && s[g] == s[g + len - 1 - f])
2312 --g;
2313 sfx[i] = f - g;
2318 static void
2319 compute_goodsfx(ssize_t *goodsfx, const unsigned char *s, ssize_t len)
2321 ssize_t i, j, *sfx = goodsfx + len;
2323 compute_sfx(sfx, s, len);
2325 for (i = 0; i < len; ++i)
2326 goodsfx[i] = len;
2327 j = 0;
2328 for (i = len - 1; i >= 0; --i)
2329 if (sfx[i] == i + 1)
2330 for (; j < len - 1 - i; ++j)
2331 if (goodsfx[j] == len)
2332 goodsfx[j] = len - 1 - i;
2333 for (i = 0; i <= len - 2; ++i)
2334 goodsfx[len - 1 - sfx[i]] = len - 1 - i;
2337 /* Search for a constant byte string using the Boyer-Moore algorithm. */
2338 static inline unsigned char*
2339 bm_find(unsigned char *buf, size_t buflen, unsigned char *needle,
2340 size_t maxidx, ssize_t *badchar, ssize_t *goodsfx)
2342 while (buflen > maxidx) {
2343 unsigned char *p;
2344 size_t i;
2345 ssize_t shift;
2347 for (p = buf + maxidx, i = maxidx; p >= buf; --p, --i)
2348 if (needle[i] != *p)
2349 break;
2350 if (p < buf)
2351 return buf;
2353 shift = i + 1 - badchar[*p];
2354 if (shift < goodsfx[i])
2355 shift = goodsfx[i];
2357 buf += shift;
2358 buflen -= shift;
2360 return NULL;
2363 /* Search for a constant byte string backwards. */
2364 static inline unsigned char*
2365 bm_find_rev(unsigned char *buf, size_t buflen, unsigned char *needle,
2366 size_t maxidx, ssize_t *badchar, ssize_t *goodsfx)
2368 buf -= maxidx;
2369 while (buflen > maxidx) {
2370 unsigned char *p;
2371 size_t i;
2372 ssize_t shift;
2374 for (p = buf, i = maxidx; p <= buf + maxidx; ++p, --i)
2375 if (needle[i] != *p)
2376 break;
2377 if (p > buf + maxidx)
2378 return buf;
2380 shift = i + 1 - badchar[*p];
2381 if (shift < goodsfx[i])
2382 shift = goodsfx[i];
2384 buf -= shift;
2385 buflen -= shift;
2387 return NULL;
2390 /* Search for a constant byte string in @buf.
2391 * If @buflen is negative, search backwards, otherwise search forwards.
2393 static inline unsigned char*
2394 find_bytestr_buf(unsigned char *buf, ssize_t buflen,
2395 unsigned char *needle, ssize_t maxidx,
2396 ssize_t *badchar, ssize_t *goodsfx)
2398 if (buflen < 0) {
2399 buflen = -buflen;
2400 if (!maxidx)
2401 return memrchr(buf - buflen + 1, *needle, buflen);
2402 return bm_find_rev(buf, buflen, needle, maxidx,
2403 badchar, goodsfx);
2404 } else {
2405 if (!maxidx)
2406 return memchr(buf, *needle, buflen);
2407 return bm_find(buf, buflen, needle, maxidx,
2408 badchar, goodsfx);
2412 /* Search for a constant byte string using the Boyer-Moore algorithm. */
2413 static int
2414 find_bytestr(struct hed_file *file, blockoff_t *from, int dir,
2415 unsigned char *needle, ssize_t len)
2417 void *dynalloc;
2418 ssize_t *badchar, *goodsfx;
2419 unsigned char *readbuf;
2420 ssize_t slen;
2421 int ret;
2423 if (len > 1) {
2424 dynalloc = calloc(sizeof(ssize_t) * (256 + 2*len)
2425 + 2*(len-1), 1);
2426 if (!dynalloc)
2427 return HED_FINDOFF_ERROR;
2428 badchar = dynalloc;
2429 goodsfx = badchar + 256;
2430 readbuf = dynalloc + sizeof(ssize_t) * (256 + 2*len);
2432 if (dir < 0)
2433 reverse(needle, len);
2434 compute_badchar(badchar, needle, len);
2435 compute_goodsfx(goodsfx, needle, len);
2436 } else {
2437 dynalloc = NULL;
2438 badchar = goodsfx = NULL;
2439 readbuf = NULL;
2442 --len; /* simplify offset computing */
2443 if (dir < 0) {
2444 from->off += len;
2445 from->pos += len;
2446 slen = -len;
2447 } else
2448 slen = len;
2450 ret = HED_FINDOFF_NO_MATCH;
2451 while (from->pos >= 0) {
2452 ssize_t remain;
2454 fixup_cursor(from);
2455 if (hed_block_is_eof(from->block))
2456 break;
2458 remain = hed_prepare_read(file, from, SSIZE_MAX);
2459 if (!remain) {
2460 ret = HED_FINDOFF_ERROR;
2461 break;
2463 if (dir < 0)
2464 remain = -(from->off + 1);
2466 if (!hed_block_is_bad(from->block)) {
2467 unsigned char *p, *q;
2469 if ((dir >= 0 && remain > slen) ||
2470 (dir < 0 && remain < slen)) {
2471 assert(!hed_block_is_virtual(from->block));
2472 assert(from->block->dataobj);
2473 p = from->block->dataobj->data + from->off;
2474 from->off += remain;
2475 from->pos += remain;
2476 } else if (dir >= 0) {
2477 remain += slen;
2478 if (copy_in(file, readbuf, remain, from)) {
2479 ret = HED_FINDOFF_ERROR;
2480 break;
2482 p = readbuf;
2483 } else {
2484 remain += slen;
2485 from->off += remain + 1;
2486 from->pos += remain + 1;
2487 if (from->pos < 0)
2488 break;
2489 fixup_cursor_slow(from);
2490 if (copy_in(file, readbuf, -remain, from)) {
2491 ret = HED_FINDOFF_ERROR;
2492 break;
2494 from->off -= -remain + 1;
2495 from->pos -= -remain + 1;
2496 p = readbuf + (-remain - 1);
2499 q = find_bytestr_buf(p, remain, needle, len,
2500 badchar, goodsfx);
2501 if (q) {
2502 move_rel_fast(from, q - p - remain);
2503 ret = 0;
2504 break;
2506 from->off -= slen;
2507 from->pos -= slen;
2508 } else {
2509 /* bad blocks cannot match anything */
2510 from->off += remain;
2511 from->pos += remain;
2515 if (dynalloc)
2516 free(dynalloc);
2517 return ret;
2520 static int
2521 find_expr(struct hed_file *file, blockoff_t *from, int dir,
2522 struct hed_expr *expr, struct ffb_hookdata *data)
2524 int len = hed_expr_len(expr);
2525 unsigned char *buf;
2527 assert(len > 0);
2529 if (len > file_size(file))
2530 return HED_FINDOFF_NO_MATCH;
2531 if ((hed_off_t)file_size(file) - from->pos - len < 0)
2532 hed_move_relative(from,
2533 (hed_off_t)file_size(file) - from->pos - len);
2535 for (;;) {
2536 blockoff_t match;
2537 size_t remain;
2538 unsigned char *p;
2539 int pos;
2541 buf = hed_expr_eval(expr, eval_reg_cb, NULL, data);
2542 if (!buf)
2543 return HED_FINDOFF_ERROR;
2545 hed_dup_cursor(from, &match);
2546 remain = 0;
2547 for (pos = 0; pos < len; pos++) {
2548 if (!remain) {
2549 remain = hed_prepare_read(file, &match,
2550 SIZE_MAX);
2551 if (!remain ||
2552 hed_block_is_bad(match.block))
2553 break;
2554 p = hed_cursor_data(&match);
2555 cursor_next_block(&match);
2557 if (*p++ != buf[pos])
2558 break;
2559 remain--;
2561 put_cursor(&match);
2563 if (pos == len)
2564 return 0;
2565 if (!remain)
2566 return HED_FINDOFF_ERROR;
2568 from->pos += dir;
2569 from->off += dir;
2570 if (0 > from->pos || from->pos > file_size(file) - len)
2571 break;
2572 fixup_cursor(from);
2574 if (! (hed_expr_flags(expr) & HED_AEF_DYNAMIC) )
2575 return find_bytestr(file, from, dir, buf, len);
2578 return HED_FINDOFF_NO_MATCH;
2582 hed_file_find_expr(struct hed_file *file, blockoff_t *pos, int dir,
2583 struct hed_expr *expr,
2584 hed_expr_reg_cb expr_cb, void *expr_cb_data)
2586 struct ffb_hookdata data;
2587 int res;
2589 assert(dir == 1 || dir == -1);
2591 data.file = file;
2592 data.pos = pos;
2593 data.base_ecb = expr_cb;
2594 data.base_ecb_data = expr_cb_data;
2596 hed_file_set_readahead(file,
2597 dir > 0 ? HED_RA_FORWARD : HED_RA_BACKWARD);
2598 res = find_expr(file, pos, dir, expr, &data);
2599 hed_file_set_readahead(file, HED_RA_NONE);
2601 return res;