Split Boyer-Moore forward and backward search
[hed.git] / libhed / file.c
blob57abf23543f1a37e84bfa6afb661c38383142fce
1 /* $Id$ */
3 /*
4 * hed - Hexadecimal editor
5 * Copyright (C) 2004 Petr Baudis <pasky@ucw.cz>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of version 2 of the GNU General Public License as
9 * published by the Free Software Foundation.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 * There hammer on the anvil smote,
23 * There chisel clove, and graver wrote;
24 * There forged was blade, and bound was hilt;
25 * The delver mined, the mason built.
26 * There beryl, pearl, and opal pale,
27 * And metal wrought like fishes' mail,
28 * Buckler and corslet, axe and sword,
29 * And shining spears were laid in hoard.
32 /* Feature macros needed for:
33 * - memrchr
34 * - pread, pwrite
35 * - stpcpy
37 #define _GNU_SOURCE
39 #include <config.h>
41 #include <errno.h>
42 #include <fcntl.h>
43 #include <limits.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <sys/ioctl.h>
48 #include <unistd.h>
49 #include <linux/fs.h> /* BLKGETSIZE and BLKGETSIZE64 */
51 #include "file.h"
52 #include "access.h"
53 #include "cache.h"
54 #include "swap.h"
55 #include "tree.h"
56 #include "expr.h"
58 /* memrchr() might not be available */
59 #ifndef HAVE_MEMRCHR
60 # include "memrchr.c"
61 #endif /* HAVE_MEMRCHR */
64 * `Piles of jewels?' said Gandalf. `No. The Orcs have often plundered Moria;
65 * there is nothing left in the upper halls. And since the dwarves fled, no one
66 * dares to seek the shafts and treasuries down in the deep places: they are
67 * drowned in water--or in a shadow of fear.'
70 /* TODO: Currently the file blocks allocation is not very sophisticated and
71 * when the weather is bad it could probably have rather horrible results. */
73 #undef BLOCKS_DEBUG
74 #ifdef BLOCKS_DEBUG
75 #define BDEBUG(x...) fprintf(stderr, x)
76 #else
77 #define BDEBUG(x...)
78 #endif
80 /* Number of blocks in cache */
81 #define CACHE_LENGTH 64
83 /* Blocks for readahead */
84 #define FILE_READAHEAD (CACHE_LENGTH/2)
86 #define file_block hed_block
87 #define blockoff_t hed_cursor_t
89 #define block_is_dirty hed_block_is_dirty
90 #define block_set_dirty hed_block_set_dirty
91 #define block_clear_dirty hed_block_clear_dirty
92 #define block_is_inserted hed_block_is_inserted
93 #define block_set_inserted hed_block_set_inserted
94 #define block_clear_inserted hed_block_clear_inserted
95 #define block_is_excache hed_block_is_excache
96 #define block_set_excache hed_block_set_excache
97 #define block_clear_excache hed_block_clear_excache
98 #define block_is_virtual hed_block_is_virtual
99 #define block_set_virtual hed_block_set_virtual
100 #define block_clear_virtual hed_block_clear_virtual
101 #define block_is_eof hed_block_is_eof
102 #define block_set_eof hed_block_set_eof
103 #define block_clear_eof hed_block_clear_eof
104 #define block_is_inner_virtual hed_block_is_inner_virtual
106 #define block_data hed_block_data
108 #define null_block(tree) (tree_entry(null_node(tree),struct file_block,t))
110 #define first_block(tree) (tree_entry(first_in_tree(tree),struct file_block,t))
111 #define prev_block(b) (tree_entry(prev_in_tree(&(b)->t),struct file_block,t))
112 #define next_block(b) (tree_entry(next_in_tree(&(b)->t),struct file_block,t))
113 #define last_block(tree) (tree_entry(last_in_tree(tree),struct file_block,t))
115 /* false if this is not a block - out of list bounds */
116 #define is_a_block(tree,b) (&(b)->t != null_node(tree))
118 #define block_offset(tree,b) tree_block_offset((tree), &(b)->t)
120 #define recalc_block_recursive(tree,b) recalc_node_recursive((tree),&(b)->t)
122 #define chain_block(tree,b) append_to_tree((tree), &(b)->t)
123 #define recalc_chain_block(tree,b) do { \
124 chain_block((tree), (b)); \
125 recalc_block_recursive((tree), (b)); \
126 } while (0)
128 #define chain_block_after(tree,p,b) insert_into_tree((tree), &(b)->t, &(p)->t)
130 #define recalc_chain_block_after(tree,p,b) do { \
131 chain_block_after((tree), (p), (b)); \
132 recalc_block_recursive((tree), (b)); \
133 } while (0)
135 #define unchain_block(tree,b) del_from_tree((tree), &(b)->t)
136 #define recalc_unchain_block(tree,b) recalc_del_from_tree((tree), &(b)->t)
138 #define init_block_list(tree) init_tree(tree)
139 #define init_block_link(b) init_node(&(b)->t)
141 #define foreach_block(b,tree) foreach_in_tree ((b),(tree),struct file_block,t)
142 #define foreachsafe_block(b,n,tree) foreachsafe_in_tree ((b),(n),(tree),struct file_block,t)
144 #define find_block(tree,o) tree_entry(find_in_tree((tree),(o)),struct file_block,t)
146 #define file_size hed_file_size
147 #define file_blocks hed_file_blocks
149 #ifdef HED_CONFIG_SWAP
151 /* Return the swp file object */
152 static inline struct swp_file *
153 file_swp(struct hed_file *file)
155 return file->swp;
158 #else
160 /* Provide a stub for the non-swap case */
161 static inline void *
162 file_swp(struct hed_file *file)
164 return NULL;
167 #endif
169 #ifdef HED_CONFIG_READAHEAD
171 #define file_ra_none(f) ((f)->readahead == HED_RA_NONE)
172 #define file_ra_forward(f) ((f)->readahead == HED_RA_FORWARD)
173 #define file_ra_backward(f) ((f)->readahead == HED_RA_BACKWARD)
175 #else
177 #define file_ra_none(f) (1)
178 #define file_ra_forward(f) (0)
179 #define file_ra_backward(f) (0)
181 #endif /* HED_CONFIG_READAHEAD */
183 bool
184 hed_block_is_after_erase(const struct hed_tree *tree, struct hed_block *block)
186 struct hed_block *prev;
188 prev = prev_block(block);
189 if (is_a_block(tree, prev)) {
190 hed_uoff_t prevend = prev->phys_pos;
191 if (!block_is_inserted(prev))
192 prevend += prev->t.size;
193 return block->phys_pos > prevend;
194 } else
195 return block->phys_pos;
198 bool
199 hed_block_is_after_insert(const struct hed_tree *tree, struct hed_block *block)
201 struct hed_block *prev = block;
202 while (is_a_block(tree, prev = prev_block(prev)))
203 if (hed_block_size(prev)) {
204 if (block_is_inserted(prev))
205 return true;
206 break;
208 return false;
211 #ifndef BLOCKS_DEBUG
212 # define dump_blocks(file) {}
213 #else
215 static hed_uoff_t
216 block_phys_size(struct hed_file *file, struct file_block *block)
218 struct file_block *next;
220 next = next_block(block);
221 if (!is_a_block(file_blocks(file), next)) {
222 assert(block_is_virtual(block));
223 return 0;
226 return next->phys_pos - block->phys_pos;
229 static void
230 dump_block(int level, struct hed_file *file, struct hed_tree_head *node,
231 hed_uoff_t *cur_offset, hed_uoff_t *cur_poffset)
233 struct file_block *block = tree_entry(node, struct file_block, t);
234 bool virtual = block_is_virtual(block);
235 blockoff_t *cur;
236 char t[21] = " ";
238 if (is_a_node(file_blocks(file), node->left))
239 dump_block(level + 1, file, node->left, cur_offset, cur_poffset);
240 if (level < 20) t[level] = '>'; else t[19] = '.';
241 fprintf(stderr, "%s [%06llx] [%06llx] %c%c%c %05llx %05llx {%02x%02x%02x%02x} -- %p ^%p [%06llx]\n",
242 t, *cur_offset, *cur_poffset,
243 virtual ? 'v' : ' ', block_is_inserted(block) ? 'i' : ' ',
244 block_is_dirty(block) ? '*' : ' ',
245 node->size, block_phys_size(file, block),
246 !virtual && block->t.size > 0 ? block_data(block)[0] : 0,
247 !virtual && block->t.size > 1 ? block_data(block)[1] : 0,
248 !virtual && block->t.size > 2 ? block_data(block)[2] : 0,
249 !virtual && block->t.size > 3 ? block_data(block)[3] : 0,
250 node, is_a_node(file_blocks(file), node->up) ? node->up : NULL,
251 node->cover_size
253 list_for_each_entry (cur, &block->refs, list) {
254 fprintf(stderr, " <%p>: %llx->%p:%lx\n",
255 cur, cur->pos, cur->block, cur->off);
257 assert(*cur_poffset == block->phys_pos);
258 *cur_offset += node->size;
259 *cur_poffset += block_phys_size(file, block);
260 if (is_a_node(file_blocks(file), node->right))
261 dump_block(level + 1, file, node->right, cur_offset, cur_poffset);
262 assert(node->cover_size == (is_a_node(file_blocks(file), node->left) ? node->left->cover_size : 0)
263 + (is_a_node(file_blocks(file), node->right) ? node->right->cover_size : 0)
264 + node->size);
267 /* Walk the tree manually here, because foreach_block() does not provide
268 * the tree structure.
269 * TODO: Change this if you plan to debug any other block containers.
271 static void
272 dump_blocks(struct hed_file *file)
274 struct file_block *first = first_block(file_blocks(file));
275 hed_uoff_t cur_offset, cur_poffset;
277 fprintf(stderr, "-- blocks dump --\n");
278 if (is_a_block(file_blocks(file), first)) {
279 cur_offset = 0;
280 cur_poffset = first->phys_pos;
281 dump_block(0, file, file_blocks(file)->root,
282 &cur_offset, &cur_poffset);
284 fprintf(stderr, "-- blocks dump end --\n");
286 #endif
288 static void
289 get_cursor(struct hed_file *file, hed_uoff_t offset, hed_cursor_t *curs)
291 struct file_block *block;
293 block = find_block(file_blocks(file), offset);
294 assert(is_a_block(file_blocks(file), block));
295 curs->pos = offset;
296 curs->block = block;
297 curs->off = offset - block_offset(file_blocks(file), block);
298 list_add(&curs->list, &block->refs);
300 BDEBUG("Mapped %llx to %llx+%llx/%llx\n",
301 offset, offset - curs->off, curs->off, block->t.size);
304 void
305 hed_get_cursor(struct hed_file *file, hed_uoff_t offset, hed_cursor_t *curs)
307 if (hed_is_a_cursor(curs))
308 hed_put_cursor(curs);
309 get_cursor(file, offset, curs);
312 void
313 hed_put_cursor(hed_cursor_t *curs)
315 list_del(&curs->list);
318 void
319 hed_dup_cursor(const hed_cursor_t *src, hed_cursor_t *dst)
321 dst->pos = src->pos;
322 dst->block = src->block;
323 dst->off = src->off;
324 list_add_tail(&dst->list, &src->block->refs);
327 void
328 hed_dup2_cursor(const hed_cursor_t *src, hed_cursor_t *dst)
330 if (hed_is_a_cursor(dst))
331 hed_put_cursor(dst);
332 hed_dup_cursor(src, dst);
335 /* Move blockoff's from @old to @new, adding @off to their block
336 * offsets to keep them at the same position. */
337 static void
338 update_blockoffs(const struct file_block *old, struct file_block *new,
339 hed_off_t off)
341 blockoff_t *blockoff;
343 BDEBUG("Updating blockoffs from <%p> to <%p>%c%llx\n",
344 old, new, off >= 0 ? '+' : '-', off >= 0 ? off : -off);
346 list_for_each_entry(blockoff, &old->refs, list) {
347 blockoff->block = new;
348 blockoff->off += off;
352 /* Move blockoff's in the range <@start;@end> from @old to @new,
353 * adding @off to their block offset, plus moving the reference list. */
354 static void
355 move_blockoffs(const struct file_block *old, struct file_block *new,
356 hed_uoff_t start, hed_uoff_t end, hed_off_t off)
358 blockoff_t *blockoff, *nextoff;
360 BDEBUG("Moving blockoffs from <%p>:%llx:%llx to <%p>%c%llx\n",
361 old, start, end, new,
362 off >= 0 ? '+' : '-', off >= 0 ? off : -off);
364 list_for_each_entry_safe(blockoff, nextoff, &old->refs, list)
365 if (blockoff->off >= start && blockoff->off <= end) {
366 blockoff->block = new;
367 blockoff->off += off;
368 list_move(&blockoff->list, &new->refs);
372 /* Destroy blockoffs referencing @block from @start to @end */
373 static void
374 nuke_blockoffs(const struct file_block *block,
375 hed_uoff_t start, hed_uoff_t end)
377 blockoff_t *blockoff, *nextoff;
379 BDEBUG("Nuking blockoffs from <%p>:%llx:%llx\n",
380 block, start, end);
381 list_for_each_entry_safe(blockoff, nextoff, &block->refs, list)
382 if (blockoff->off >= start && blockoff->off <= end) {
383 blockoff->block = NULL;
384 list_del_init(&blockoff->list);
388 /* Update the positions of blockoffs at and after @start for all
389 * blocks starting at @block */
390 static void
391 slide_blockoffs(struct hed_file *file, const struct file_block *block,
392 hed_uoff_t start, hed_off_t off)
394 blockoff_t *blockoff;
396 BDEBUG("Sliding blockoffs >= %llx by %c%llx, starting at <%p>\n",
397 start, off >= 0 ? '+' : '-', off >= 0 ? off : -off, block);
398 while (is_a_block(file_blocks(file), block)) {
399 list_for_each_entry(blockoff, &block->refs, list)
400 if (blockoff->pos >= start)
401 blockoff->pos += off;
402 block = next_block(block);
406 static struct hed_block *
407 new_block(struct hed_file *file, long flags)
409 struct file_block *new;
411 if (! (new = swp_zalloc(file_swp(file), sizeof(struct file_block))) )
412 return NULL;
414 new->flags = flags;
415 init_block_link(new);
416 INIT_LIST_HEAD(&new->refs);
417 if (flags & HED_BLOCK_EXCACHE)
418 INIT_LIST_HEAD(&new->lru);
419 else
420 list_add_tail(&new->lru, &file->lru);
422 return new;
425 static struct hed_block *
426 new_virt_block(struct hed_file *file, hed_uoff_t pos, hed_uoff_t size,
427 long extraflags)
429 struct hed_block *new =
430 new_block(file, (HED_BLOCK_EXCACHE |
431 HED_BLOCK_VIRTUAL |
432 extraflags));
433 if (!new)
434 return NULL;
436 new->t.size = size;
437 new->phys_pos = pos;
438 BDEBUG("Spawned new virtual block [%llx] at %llx\n", size, pos);
439 return new;
442 static struct hed_block *
443 new_data_block(struct hed_file *file, hed_uoff_t pos, hed_uoff_t size,
444 struct hed_block_data *dataobj)
446 struct hed_block *new =
447 new_block(file, 0);
448 if (!new)
449 return NULL;
451 cache_get(dataobj);
452 new->dataobj = dataobj;
453 new->t.size = size;
454 new->phys_pos = pos;
455 new->dataoff = FILE_BLOCK_OFF(pos);
456 BDEBUG("Spawned new data block [%llx] at %llx\n", size, pos);
457 return new;
460 static void
461 file_free_block(struct hed_file *file, struct file_block *block)
463 if (block->dataobj)
464 cache_put(file->cache, block->dataobj);
465 list_del(&block->lru);
467 swp_free(file_swp(file), block);
470 static bool
471 kill_block_if_empty(struct hed_file *file, struct file_block *block)
473 if (!block_is_eof(block) && block->t.size == 0) {
474 /* No recalculation needed, zero size. */
475 unchain_block(file_blocks(file), block);
476 assert(list_empty(&block->refs));
477 file_free_block(file, block);
478 return true;
480 return false;
483 /* This may kill the previous block as well, if it can be merged
484 * with the next one. It will never kill anything which _follows_. */
485 static void
486 file_kill_block(struct hed_file *file, struct file_block *block)
488 hed_uoff_t phys_pos = block->phys_pos;
489 struct file_block *prev = prev_block(block);
490 struct file_block *next = next_block(block);
491 struct file_block *merger;
492 bool killprev = false;
494 /* We should never kill a dirty block! */
495 assert(!block_is_dirty(block));
496 /* We shouldn't get with an empty block here (that might
497 * need special considerations with virtualization). */
498 assert(block->t.size > 0);
500 if (is_a_block(file_blocks(file), next) &&
501 block_is_inner_virtual(next) &&
502 phys_pos + block->t.size == next->phys_pos) {
503 if (is_a_block(file_blocks(file), prev) &&
504 block_is_inner_virtual(prev) &&
505 prev->phys_pos + prev->t.size == phys_pos)
506 killprev = true;
507 merger = next;
508 merger->phys_pos -= block->t.size;
509 update_blockoffs(merger, merger, block->t.size);
510 update_blockoffs(block, merger, 0);
511 } else if (is_a_block(file_blocks(file), prev) &&
512 block_is_inner_virtual(prev) &&
513 prev->phys_pos + prev->t.size == phys_pos) {
514 merger = prev;
515 update_blockoffs(block, merger, merger->t.size);
516 } else if (!block_is_virtual(block)) {
517 /* Convert physical to virtual */
518 assert(block->dataobj);
519 cache_put(file->cache, block->dataobj);
520 block->dataobj = NULL;
522 list_del_init(&block->lru); /* unlink the block from LRU */
523 block_set_excache(block); /* say it's unlinked */
524 block_set_virtual(block);
525 return;
526 } else
527 /* Already virtual and cannot merge */
528 return;
530 list_splice(&block->refs, &merger->refs);
532 /* Messing with block sizes and unchaining is a bit tricky
533 * since unchain_block() can splay(). So we really need
534 * to recalc_block_recursive() right after we update the size.
535 * If this place turns out to be a hot-spot, we can optimize
536 * the tree operations here. */
537 merger->t.size += block->t.size;
538 recalc_block_recursive(file_blocks(file), merger);
540 /* Destroy the block */
541 recalc_unchain_block(file_blocks(file), block);
542 file_free_block(file, block);
544 if (killprev)
545 file_kill_block(file, prev);
548 static struct file_block *
549 split_block(struct hed_file *file, struct hed_block *block,
550 hed_uoff_t splitpoint)
552 struct file_block *next;
554 next = new_block(file, block->flags);
555 if (!next)
556 return NULL;
558 if ( (next->dataobj = block->dataobj) ) {
559 cache_get(next->dataobj);
560 next->dataoff = block->dataoff + splitpoint;
561 } else
562 assert(block_is_virtual(block));
564 next->t.size = block->t.size - splitpoint;
565 next->phys_pos = block->phys_pos;
566 if (!block_is_inserted(block))
567 next->phys_pos += splitpoint;
569 block->t.size = splitpoint;
570 recalc_block_recursive(file_blocks(file), block);
571 recalc_chain_block_after(file_blocks(file), block, next);
573 move_blockoffs(block, next, splitpoint, UOFF_MAX, -splitpoint);
575 return next;
578 /* Replace a chunk in @block with @newblock */
579 static int
580 replace_chunk(struct hed_file *file, struct hed_block *block,
581 hed_uoff_t offset, struct hed_block *newblock)
583 size_t len = newblock->t.size;
584 hed_uoff_t leadlen = offset + len;
586 assert(len <= block->t.size - offset);
588 /* Re-create the tail block if necessary */
589 if (block_is_eof(block) || block->t.size - offset > len) {
590 struct file_block *tail;
592 tail = new_block(file, block->flags);
593 if (!tail)
594 return -1;
595 tail->t.size = block->t.size - leadlen;
596 tail->dataobj = block->dataobj;
597 tail->dataoff = block->dataoff + leadlen;
598 tail->phys_pos = block->phys_pos + leadlen;
600 block_clear_eof(block);
601 recalc_chain_block_after(file_blocks(file), block, tail);
603 /* Move offsets to the tail */
604 move_blockoffs(block, tail, leadlen, UOFF_MAX, -leadlen);
607 /* Move pointers to the new block */
608 assert(leadlen > 0);
609 move_blockoffs(block, newblock, offset, leadlen - 1, -offset);
611 /* Shorten the leading block */
612 block->t.size = offset;
613 recalc_block_recursive(file_blocks(file), block);
615 /* Insert the new block */
616 recalc_chain_block_after(file_blocks(file), block, newblock);
618 /* Kill the leading block if possible */
619 kill_block_if_empty(file, block);
621 return 0;
624 #ifdef HED_CONFIG_SWAP
626 static char *
627 swp_filename(const char *filename)
629 size_t fnlen = strlen(filename);
630 char *swp;
631 char *file;
633 if (!(swp = malloc(fnlen + 9)) )
634 return NULL;
635 strcpy(swp, filename);
637 file = strrchr(swp, '/');
638 file = file ? file + 1 : swp;
639 *file = '.';
640 strcpy(stpcpy(file + 1, filename + (file -swp)), ".hedswp");
641 return swp;
644 static char *
645 newswp_filename(char *swpname)
647 char *ret, *p;
649 ret = swpname;
650 while (!access(ret, F_OK)) {
651 if (ret == swpname) {
652 if (! (ret = strdup(swpname)) )
653 return NULL;
654 p = ret + strlen(ret) - 1;
657 if (*p == 'a') {
658 free(ret);
659 return NULL;
661 --*p;
663 return ret;
667 hed_file_remove_swap(struct hed_file *file)
669 if (remove(file->swpname))
670 return -1;
671 if (rename(file->newswpname, file->swpname))
672 return -1;
674 free(file->newswpname);
675 file->newswpname = file->swpname;
676 return 0;
679 static inline struct hed_file *
680 file_swp_init(const char *name)
682 char *swpname, *newswpname;
683 struct swp_file *swp;
684 struct hed_file *file;
686 swpname = swp_filename(name);
687 if (!swpname)
688 goto fail;
689 newswpname = newswp_filename(swpname);
690 if (!newswpname)
691 goto fail_free_name;
692 swp = swp_init_write(newswpname);
693 if (!swp)
694 goto fail_free_newname;
696 assert(sizeof(struct swp_header) + sizeof(struct hed_file)
697 <= FILE_BLOCK_SIZE);
698 file = swp_private(swp);
699 memset(file, 0, sizeof *file);
701 file->swp = swp;
702 file->swpname = swpname;
703 file->newswpname = newswpname;
705 return file;
707 fail_free_newname:
708 free(newswpname);
709 fail_free_name:
710 if (swpname != newswpname)
711 free(swpname);
712 fail:
713 return NULL;
716 static inline void
717 file_swp_done(struct hed_file *file)
719 remove(file->newswpname);
720 if (file->newswpname != file->swpname)
721 free(file->newswpname);
722 free(file->swpname);
723 swp_done(file_swp(file));
724 /* file was de-allocated automatically with file->swp */
727 static inline void
728 init_null_block(struct hed_file *file)
730 file->null_block = null_block(file_blocks(file));
733 #else /* HED_CONFIG_SWAP */
735 static inline struct hed_file *
736 file_swp_init(const char *name)
738 return calloc(1, sizeof(struct hed_file));
741 static inline void
742 file_swp_done(struct hed_file *file)
746 #define init_null_block(f) do {} while(0)
748 #endif /* HED_CONFIG_SWAP */
750 static inline struct stat *
751 file_stat(struct hed_file *file)
753 return &file->s;
757 hed_file_update_size(struct hed_file *file)
759 hed_uoff_t oldsize = file->phys_size;
761 if(fstat(file->fd, file_stat(file)) < 0)
762 return -1;
764 if (S_ISBLK(file_stat(file)->st_mode)) {
765 if (ioctl(file->fd, BLKGETSIZE64, &file->phys_size)) {
766 unsigned long size_in_blocks;
767 if (ioctl(file->fd, BLKGETSIZE, &size_in_blocks))
768 return -1;
769 file->phys_size = (hed_uoff_t)size_in_blocks << 9;
771 } else if (S_ISREG(file_stat(file)->st_mode)) {
772 file->phys_size = file_stat(file)->st_size;
773 } else if (S_ISCHR(file_stat(file)->st_mode)) {
774 if (lseek(file->fd, 0, SEEK_SET) < 0)
775 return -1;
776 file->phys_size = OFF_MAX;
777 } else {
778 errno = EINVAL;
779 return -1;
782 file->size += file->phys_size - oldsize;
783 return 0;
786 static int
787 do_file_open(struct hed_file *file)
789 file->fd = open(file->name, O_RDONLY);
790 if (file->fd < 0) {
791 if (errno != ENOENT)
792 return errno;
793 fprintf(stderr, "Warning: File %s does not exist\n",
794 file->name);
795 memset(file_stat(file), 0, sizeof(struct stat));
797 } else if (hed_file_update_size(file)) {
798 int errsv = errno;
799 close(file->fd);
800 return errsv;
802 return 0;
805 static int
806 file_setup_blocks(struct hed_file *file)
808 hed_uoff_t phys_size = file->phys_size;
809 struct file_block *block;
810 struct file_block *vblock;
812 if (phys_size) {
813 block = new_virt_block(file, 0, phys_size, 0);
814 if (!block)
815 return -1;
816 chain_block(file_blocks(file), block);
819 vblock = new_virt_block(file, phys_size, OFF_MAX - phys_size + 1,
820 HED_BLOCK_EOF);
821 if (!vblock)
822 return -1;
823 recalc_chain_block(file_blocks(file), vblock);
825 return 0;
829 libhed_init(void)
831 return file_access_init();
834 struct hed_file *
835 hed_open(const char *name)
837 struct hed_file *file;
839 if (! (file = file_swp_init(name)) )
840 goto fail;
842 file->name = name;
844 init_block_list(file_blocks(file));
845 init_null_block(file);
847 file->cache = cache_init(CACHE_LENGTH, file_swp(file));
848 if (!file->cache)
849 goto fail_file;
850 INIT_LIST_HEAD(&file->lru);
852 if (do_file_open(file))
853 goto fail_file;
855 if (file_setup_blocks(file))
856 goto fail_file;
858 fixup_register(file);
860 return file;
862 fail_file:
863 hed_close(file);
864 fail:
865 return NULL;
868 void
869 hed_close(struct hed_file *file)
871 assert(file);
873 /* Do not check for errors:
874 * 1. This FD is read-only => no data loss possbile
875 * 2. We're about to exit anyway => no resource leak
877 if (file->fd >= 0)
878 close(file->fd);
880 fixup_deregister(file);
882 /* No need to free file blocks here, because all data is
883 * allocated either from the cache or from the swap file
884 * and both is going to be destroyed now.
887 if (file->cache)
888 cache_done(file->cache);
890 file_swp_done(file);
893 /* Adjust blockoff after off gets outside its block */
894 static void
895 fixup_blockoff_slow(blockoff_t *blockoff)
897 struct file_block *block = blockoff->block;
898 hed_uoff_t off = blockoff->off;
900 do {
901 if ((hed_off_t)off < 0) {
902 block = prev_block(block);
903 off += block->t.size;
904 } else {
905 off -= block->t.size;
906 block = next_block(block);
908 } while (off >= block->t.size);
910 blockoff->block = block;
911 blockoff->off = off;
912 list_move(&blockoff->list, &block->refs);
915 /* Adjust blockoff if off gets outside its block.
916 * This is separate from fixup_blockoff_slow, because it is supposed
917 * to be small enough to be inlined (which is a win, because most of
918 * the time no fixup has to be done, so the fast inlined path is used).
920 static inline void
921 fixup_blockoff(blockoff_t *blockoff)
923 if (blockoff->off >= blockoff->block->t.size)
924 fixup_blockoff_slow(blockoff);
927 hed_off_t
928 hed_move_relative(blockoff_t *blockoff, hed_off_t num)
930 hed_off_t newpos = blockoff->pos + num;
931 if (newpos < 0) {
932 newpos = num < 0 ? 0 : OFF_MAX;
933 num = newpos - blockoff->pos;
935 blockoff->pos = newpos;
936 blockoff->off += num;
937 fixup_blockoff(blockoff);
938 return num;
941 /* Relative move with no checking (and only by a small amount) */
942 static inline void
943 move_rel_fast(blockoff_t *blockoff, ssize_t num)
945 blockoff->off += num;
946 blockoff->pos += num;
947 fixup_blockoff(blockoff);
950 static void
951 alloc_caches(struct hed_file *file, struct hed_block_data **caches, int n)
953 struct remap_control rc;
954 int i;
956 BDEBUG("Allocate %d caches (%d free slots available)\n",
957 n, file->cache->nfree);
959 assert(n <= CACHE_LENGTH);
960 while (file->cache->nfree < n) {
961 struct file_block *block;
963 assert(!list_empty(&file->lru));
964 block = list_entry(file->lru.next, struct file_block, lru);
965 BDEBUG("Killing block at physical %llx\n", block->phys_pos);
966 file_kill_block(file, block);
969 for (i = 0; i < n; ++i) {
970 caches[i] = cache_alloc(file->cache);
971 assert(caches[i]);
974 remap_init(&rc);
975 remap_compact(&rc, file->cache, caches, n);
976 for (i = 0; i < n; ++i)
977 remap_add(&rc, caches[i]->data);
978 remap_finish(&rc);
981 static inline void
982 free_caches(struct hed_file *file, struct hed_block_data **preload, int n)
984 int i;
986 for (i = 0; i < n; ++i)
987 if (preload[i])
988 cache_put(file->cache, preload[i]);
991 static int
992 file_load_data(struct hed_file *file,
993 struct hed_block_data **caches, int n,
994 hed_uoff_t offset)
996 struct hed_block_data *dataobj = caches[0];
997 void *data = dataobj->data;
998 ssize_t rsize, total, segsize;
1000 segsize = n << FILE_BLOCK_SHIFT;
1001 for (total = 0; total < segsize; total += rsize) {
1002 rsize = pread(file->fd, data + total,
1003 segsize - total, offset + total);
1004 if (!rsize)
1005 break;
1006 if (rsize < 0) {
1007 dataobj = caches[total >> FILE_BLOCK_SHIFT];
1008 caches[total >> FILE_BLOCK_SHIFT] = NULL;
1009 data = dataobj->data;
1010 cache_put(file->cache, dataobj);
1011 total = FILE_BLOCK_ROUND(total);
1012 rsize = FILE_BLOCK_SIZE;
1013 BDEBUG("Error reading block at phys %llx: %s\n",
1014 offset + total, strerror(errno));
1018 BDEBUG("Loaded data at phys %llx up to %llx\n",
1019 offset, offset + segsize);
1020 return 0;
1023 #ifdef HED_CONFIG_MMAP
1025 static int
1026 file_load_data_mmap(struct hed_file *file,
1027 struct hed_block_data **caches, int n,
1028 hed_uoff_t offset)
1030 void *data;
1031 ssize_t segsize;
1032 int i;
1034 segsize = n << FILE_BLOCK_SHIFT;
1035 data = mmap(NULL, segsize,
1036 PROT_READ | PROT_WRITE,
1037 MAP_PRIVATE | (file->fd < 0 ? MAP_ANONYMOUS : 0),
1038 file->fd, offset);
1040 if (data == MAP_FAILED) {
1041 BDEBUG("mmap failed at %llx: fail over to traditional read\n",
1042 offset);
1044 data = mmap(NULL, segsize,
1045 PROT_READ | PROT_WRITE,
1046 MAP_PRIVATE | MAP_ANONYMOUS,
1047 0, 0);
1048 if (data == MAP_FAILED)
1049 return -1;
1051 for (i = 0; i < n; ++i)
1052 caches[i]->data = data + (i << FILE_BLOCK_SHIFT);
1053 return file_load_data(file, caches, n, offset);
1056 for (i = 0; i < n; ++i)
1057 caches[i]->data = data + (i << FILE_BLOCK_SHIFT);
1059 BDEBUG("Loaded data at phys %llx up to %llx\n",
1060 offset, offset + segsize);
1061 return 0;
1063 # define file_load_data file_load_data_mmap
1065 #endif
1067 static int
1068 load_blocks(struct hed_file *file, const blockoff_t *from)
1070 hed_uoff_t physpos, segstart;
1071 struct hed_block_data *preload[FILE_READAHEAD];
1072 size_t ra_bkw, ra_fwd, ra_off;
1073 hed_cursor_t pos;
1074 int nblocks;
1076 segstart = hed_cursor_phys_pos(from);
1077 ra_bkw = FILE_BLOCK_OFF(segstart);
1078 ra_fwd = FILE_BLOCK_SIZE - ra_bkw;
1080 if (file_ra_forward(file))
1081 ra_fwd += (FILE_READAHEAD - 1) << FILE_BLOCK_SHIFT;
1082 else if (file_ra_backward(file))
1083 ra_bkw += (FILE_READAHEAD - 1) << FILE_BLOCK_SHIFT;
1085 if (ra_bkw > segstart)
1086 ra_bkw = segstart;
1087 if (ra_fwd > file->phys_size - segstart)
1088 ra_fwd = file->phys_size - segstart;
1090 segstart -= ra_bkw;
1091 ra_fwd += ra_bkw;
1092 pos.block = from->block;
1093 while (pos.block->phys_pos > segstart)
1094 pos.block = prev_block(pos.block);
1095 pos.off = segstart - pos.block->phys_pos;
1097 list_add(&pos.list, &pos.block->refs);
1098 nblocks = ((ra_fwd - 1) >> FILE_BLOCK_SHIFT) + 1;
1099 alloc_caches(file, preload, nblocks);
1100 hed_put_cursor(&pos);
1102 if (file_load_data(file, preload, nblocks, segstart)) {
1103 free_caches(file, preload, nblocks);
1104 return -1;
1107 while (physpos = hed_cursor_phys_pos(&pos),
1108 ra_off = physpos - segstart,
1109 ra_off < ra_fwd) {
1110 struct hed_block_data *dataobj;
1111 struct hed_block *newblock;
1112 size_t datalen;
1114 if (!block_is_virtual(pos.block)) {
1115 pos.block = next_block(pos.block);
1116 pos.off = 0;
1117 continue;
1120 datalen = FILE_BLOCK_SIZE - FILE_BLOCK_OFF(physpos);
1121 if (datalen > hed_block_size(pos.block) - pos.off)
1122 datalen = hed_block_size(pos.block) - pos.off;
1124 dataobj = preload[ra_off >> FILE_BLOCK_SHIFT];
1125 newblock = dataobj
1126 ? new_data_block(file, physpos, datalen, dataobj)
1127 : new_virt_block(file, physpos, datalen,
1128 HED_BLOCK_BAD);
1130 /* Punch the new block */
1131 BDEBUG("Add %s block at %llx, length %lx\n",
1132 hed_block_is_virtual(newblock) ? "error" : "physical",
1133 newblock->phys_pos, newblock->t.size);
1134 if (replace_chunk(file, pos.block, pos.off, newblock)) {
1135 file_free_block(file, newblock);
1136 free_caches(file, preload, nblocks);
1137 return -1;
1140 pos.block = next_block(newblock);
1141 pos.off = 0;
1144 /* All cache objects now have an extra reference from the
1145 * allocation. Drop it. */
1146 free_caches(file, preload, nblocks);
1148 dump_blocks(file);
1149 return 0;
1152 /* Shorten a block at beginning and enlarge the preceding block.
1154 * Re-allocate at most @len bytes from the beginning of @block to the
1155 * end of the preceding block.
1156 * If @block is virtual, this will effectively devirtualize the range.
1157 * If @block is not virtual, this will change the backing store of
1158 * the bytes in the range.
1159 * Returns: the number of bytes actually moved.
1161 static size_t
1162 shrink_at_begin(struct hed_file *file, struct file_block *block,
1163 size_t len, long state)
1165 struct file_block *prev = prev_block(block);
1166 size_t maxgrow;
1168 /* Basic assumptions */
1169 assert(!(state & HED_BLOCK_VIRTUAL));
1171 /* The previous block must exist: */
1172 if (!is_a_block(file_blocks(file), prev))
1173 return 0;
1175 /* The block flags must match the requested @state: */
1176 if ((prev->flags & HED_BLOCK_STATEMASK) != state)
1177 return 0;
1179 /* No deletions at end, or similar: */
1180 if (prev->phys_pos + prev->t.size != block->phys_pos)
1181 return 0;
1183 /* Append less bytes than requested if not all are available */
1184 assert(prev->t.size <= prev->dataobj->size);
1185 maxgrow = prev->dataobj->size - prev->dataoff - prev->t.size;
1186 if (len > maxgrow)
1187 len = maxgrow;
1188 if (!len)
1189 return 0;
1191 BDEBUG("Appending 0:%lx to the previous block\n", len);
1193 /* Move blockoffs away from the to-be-chopped beginning */
1194 move_blockoffs(block, prev, 0, len - 1, prev->t.size);
1196 /* Enlarge the previous block */
1197 prev->t.size += len;
1198 recalc_block_recursive(file_blocks(file), prev);
1200 /* Shorten the original block */
1201 block->t.size -= len;
1202 block->dataoff += len;
1203 block->phys_pos += len;
1204 recalc_block_recursive(file_blocks(file), block);
1205 return len;
1208 /* Shorten a block at end and enlarge the following block.
1210 * Re-allocate at most @len bytes from the end of @block to the
1211 * beginning of the following block.
1212 * If @block is virtual, this will effectively devirtualize the range.
1213 * If @block is not virtual, this will change the backing store of
1214 * the bytes in the range.
1215 * Returns: the number of bytes actually moved.
1217 static size_t
1218 shrink_at_end(struct hed_file *file, struct file_block *block,
1219 size_t len, long state)
1221 struct file_block *next = next_block(block);
1222 hed_uoff_t off;
1224 /* Basic assumptions */
1225 assert(!(state & HED_BLOCK_VIRTUAL));
1227 /* The next block must exist: */
1228 if (!is_a_block(file_blocks(file), next))
1229 return 0;
1231 /* The block flags must match the requested @state: */
1232 if ((next->flags & HED_BLOCK_STATEMASK) != state)
1233 return 0;
1235 /* No deletions at end, or similar: */
1236 if (block->phys_pos + block->t.size != next->phys_pos)
1237 return 0;
1239 /* Prepend less bytes than requested if not all are available */
1240 if (len > next->dataoff)
1241 len = next->dataoff;
1242 if (!len)
1243 return 0;
1244 off = block->t.size - len;
1246 BDEBUG("Prepending %llx:%lx to the next block\n", off, len);
1248 /* Shift blockoffs in the new physical block */
1249 update_blockoffs(next, next, len);
1251 /* Move blockoffs away from the to-be-chopped end */
1252 move_blockoffs(block, next, off, UOFF_MAX, -off);
1254 /* Enlarge the next block */
1255 next->dataoff -= len;
1256 next->phys_pos -= len;
1257 next->t.size += len;
1258 recalc_block_recursive(file_blocks(file), next);
1260 /* Shorten the original block */
1261 block->t.size -= len;
1262 recalc_block_recursive(file_blocks(file), block);
1263 return len;
1266 /* Search for an existing data object within the same physical block
1267 * as @curs, and having the given @state flags.
1269 static struct hed_block_data *
1270 search_data(struct hed_file *file, const hed_cursor_t *curs, long state)
1272 struct file_block *block;
1273 hed_uoff_t physpos;
1275 physpos = FILE_BLOCK_ROUND(curs->block->phys_pos + curs->off);
1276 BDEBUG("Search for already loaded data at %llx starting at %llx...",
1277 physpos, curs->block->phys_pos);
1279 /* Search backwards */
1280 block = prev_block(curs->block);
1281 while (is_a_block(file_blocks(file), block) &&
1282 block->phys_pos >= physpos) {
1283 if ((block->flags & HED_BLOCK_STATEMASK) == state) {
1284 BDEBUG(" found at %llx\n", block->phys_pos);
1285 assert(block->dataobj);
1286 return block->dataobj;
1288 block = prev_block(block);
1291 /* Search forwards */
1292 block = next_block(curs->block);
1293 while (is_a_block(file_blocks(file), block) &&
1294 block->phys_pos < physpos + FILE_BLOCK_SIZE) {
1295 if ((block->flags & HED_BLOCK_STATEMASK) == state) {
1296 BDEBUG(" found at %llx\n", block->phys_pos);
1297 assert(block->dataobj);
1298 return block->dataobj;
1300 block = next_block(block);
1303 BDEBUG(" not found\n");
1304 return NULL;
1307 static int
1308 reuse_loaded_data(struct hed_file *file, const blockoff_t *blockoff,
1309 struct hed_block_data *data)
1311 struct file_block *physblock;
1312 struct file_block *block = blockoff->block;
1313 hed_uoff_t block_offset = blockoff->off;
1314 hed_uoff_t physpos = block->phys_pos + block_offset;
1315 size_t part = FILE_BLOCK_OFF(physpos);
1316 size_t len =
1317 FILE_BLOCK_SIZE - part <= block->t.size - block_offset
1318 ? FILE_BLOCK_SIZE - part
1319 : block->t.size - block_offset;
1321 if (block->phys_pos <= physpos - part) {
1322 physpos -= part;
1323 len += part;
1324 } else {
1325 physpos -= block_offset;
1326 len += block_offset;
1329 if (! (physblock = new_data_block(file, physpos, len, data)) )
1330 return -1;
1332 BDEBUG("Add physical block at %llx, length %lx\n",
1333 physblock->phys_pos, physblock->t.size);
1334 if (replace_chunk(file, block, block_offset, physblock)) {
1335 file_free_block(file, physblock);
1336 return -1;
1339 dump_blocks(file);
1340 return 0;
1343 /* Replace a part of a virtual block with content loaded
1344 * from disk. The amount of data loaded from the disk depends
1345 * on various factors with the goal to choose the most efficient
1346 * ratio. The only guarantee is that the byte at @blockoff will
1347 * be in a non-virtual block when this function returns 0.
1349 static int
1350 devirtualize_clean(struct hed_file *file, const blockoff_t *blockoff)
1352 struct file_block *block = blockoff->block;
1353 hed_uoff_t block_offset = blockoff->off;
1354 hed_uoff_t remain = block->t.size - block_offset;
1355 struct hed_block_data *data;
1357 BDEBUG("punch a clean hole at %llx into %llx:%llx\n", block_offset,
1358 block_offset(file_blocks(file), block), block->t.size);
1359 assert(block_is_virtual(block));
1361 /* Check if we can combine with a neighbouring block */
1362 if (shrink_at_begin(file, block, SIZE_MAX, 0) > block_offset ||
1363 shrink_at_end(file, block, SIZE_MAX, 0) >= remain) {
1364 kill_block_if_empty(file, block);
1365 dump_blocks(file);
1366 return 0;
1369 /* Check if the block is already loaded elsewhere */
1370 data = search_data(file, blockoff, 0);
1371 return data
1372 ? reuse_loaded_data(file, blockoff, data)
1373 : load_blocks(file, blockoff);
1376 /* Replace at most @len bytes of a virtual block with a newly
1377 * allocated out-of-cache block. The block is marked dirty
1378 * and its data is left uninitialized.
1379 * If the block at @blockoff is not virtual, make it dirty.
1380 * Note that this function may devirtualize less than @len bytes.
1381 * In the worst case only 1 byte at @blockoff will be available.
1383 static int
1384 prepare_modify(struct hed_file *file, blockoff_t *blockoff, size_t len)
1386 struct file_block *block = blockoff->block;
1387 hed_uoff_t block_offset = blockoff->off;
1388 hed_uoff_t remain = block->t.size - block_offset;
1389 struct file_block *newblock;
1391 if (block_is_dirty(block))
1392 return 0;
1394 if (len > remain)
1395 len = remain;
1397 BDEBUG("punch a dirty hole at %llx:%lx into %llx:%llx\n",
1398 block_offset, len,
1399 block_offset(file_blocks(file), block), block->t.size);
1401 /* Check if we can combine with a neighbouring block */
1402 if ((block_offset == 0 &&
1403 shrink_at_begin(file, block, len, HED_BLOCK_DIRTY)) ||
1404 (remain == len &&
1405 shrink_at_end(file, block, len, HED_BLOCK_DIRTY) >= len)) {
1406 kill_block_if_empty(file, block);
1407 dump_blocks(file);
1408 return 0;
1411 /* Initialize a new block */
1412 newblock = new_block(file, HED_BLOCK_EXCACHE | HED_BLOCK_DIRTY);
1413 if (!newblock)
1414 goto out_err;
1416 /* Allocate data */
1417 if ( (newblock->dataobj = search_data(file, blockoff,
1418 HED_BLOCK_DIRTY)) )
1419 cache_get(newblock->dataobj);
1420 else if (! (newblock->dataobj = block_data_new(file->cache,
1421 FILE_BLOCK_SIZE)) )
1422 goto out_err_free;
1424 newblock->phys_pos = block->phys_pos + block_offset;
1425 newblock->dataoff = FILE_BLOCK_OFF(newblock->phys_pos);
1426 if (len > FILE_BLOCK_SIZE - newblock->dataoff)
1427 len = FILE_BLOCK_SIZE - newblock->dataoff;
1428 newblock->t.size = len;
1430 if (replace_chunk(file, block, block_offset, newblock))
1431 goto out_err_free;
1433 dump_blocks(file);
1434 return 0;
1436 out_err_free:
1437 file_free_block(file, newblock);
1438 out_err:
1439 return -1;
1442 /* Ensure that blockoff points to an up-to-date non-virtual block.
1443 * Load the data from disk if necessary, return 0 on success. */
1444 size_t
1445 hed_prepare_read(struct hed_file *file, const hed_cursor_t *curs, size_t len)
1447 struct file_block *block = curs->block;
1448 if (hed_block_is_inner_virtual(block) &&
1449 devirtualize_clean(file, curs) < 0)
1450 return 0;
1452 return hed_cursor_chunk_len(curs, len);
1455 /* Move the block pointer to the next block */
1456 static int
1457 blockoff_next_block(struct hed_file *file, blockoff_t *blockoff)
1459 struct file_block *block = blockoff->block;
1461 do {
1462 block = next_block(block);
1463 if (!is_a_block(file_blocks(file), block))
1464 return -1;
1465 } while (!block->t.size);
1467 blockoff->block = block;
1468 blockoff->off = 0;
1469 list_move(&blockoff->list, &block->refs);
1470 return 0;
1473 /* This is an optimized way of doing:
1475 * hed_move_relative(blockoff, blockoff->block->t.size);
1477 * for the case when blockoff->off == 0.
1479 static int
1480 move_to_next(struct hed_file *file, blockoff_t *blockoff)
1482 blockoff->pos += blockoff->block->t.size;
1483 return blockoff_next_block(file, blockoff);
1486 /* Copy in @count bytes from @pos.
1487 * Returns the number of bytes that were not read (i.e. zero on success).
1488 * The @pos blockoff is moved by the amount of data read.
1489 * CAUTION: If you read up to EOF, then @pos points to the null_block
1490 * upon return.
1492 static size_t
1493 copy_in(struct hed_file *file, void *buf, size_t count, blockoff_t *pos)
1495 size_t cpylen;
1497 assert(is_a_block(file_blocks(file), pos->block));
1499 pos->pos += count;
1500 while (count && (cpylen = hed_prepare_read(file, pos, count))) {
1501 if (block_is_virtual(pos->block))
1502 memset(buf, 0, cpylen);
1503 else
1504 memcpy(buf, block_data(pos->block) + pos->off, cpylen);
1506 buf += cpylen;
1507 count -= cpylen;
1508 if ( (pos->off += cpylen) >= hed_block_size(pos->block) )
1509 if (blockoff_next_block(file, pos))
1510 break;
1512 pos->pos -= count;
1513 return count;
1516 size_t
1517 hed_file_cpin(struct hed_file *file, void *buf, size_t count,
1518 const hed_cursor_t *pos)
1520 blockoff_t mypos;
1521 size_t ret;
1523 hed_dup_cursor(pos, &mypos);
1524 ret = copy_in(file, buf, count, &mypos);
1525 hed_put_cursor(&mypos);
1526 return ret;
1529 /* Set the modified flag */
1530 static inline void
1531 set_modified(struct hed_file *file)
1533 file->modified = true;
1536 /* Clear the modified flag */
1537 static inline void
1538 clear_modified(struct hed_file *file)
1540 file->modified = false;
1544 hed_file_set_byte(struct hed_file *file, blockoff_t *blockoff,
1545 unsigned char byte)
1547 hed_uoff_t offset = blockoff->pos;
1549 if (prepare_modify(file, blockoff, 1))
1550 return -1;
1551 set_modified(file);
1553 if (offset >= file->size)
1554 file->size = offset + 1;
1556 block_data(blockoff->block)[blockoff->off] = byte;
1557 return 0;
1560 size_t
1561 hed_file_set_block(struct hed_file *file, blockoff_t *blockoff,
1562 unsigned char *buf, size_t len)
1564 while (len) {
1565 size_t span;
1567 if (prepare_modify(file, blockoff, len))
1568 break;
1569 set_modified(file);
1571 span = hed_cursor_chunk_len(blockoff, len);
1572 memcpy(block_data(blockoff->block) + blockoff->off,
1573 buf, span);
1574 buf += span;
1575 len -= span;
1576 move_rel_fast(blockoff, span);
1578 if (blockoff->pos > file->size)
1579 file->size = blockoff->pos;
1581 return len;
1584 hed_uoff_t
1585 hed_file_set_bytes(struct hed_file *file, blockoff_t *blockoff,
1586 unsigned char byte, hed_uoff_t rep)
1588 while (rep) {
1589 size_t span;
1591 if (prepare_modify(file, blockoff, rep))
1592 break;
1593 set_modified(file);
1595 span = hed_cursor_chunk_len(blockoff, rep);
1596 memset(block_data(blockoff->block) + blockoff->off,
1597 byte, span);
1598 rep -= span;
1599 move_rel_fast(blockoff, span);
1601 if (blockoff->pos > file->size)
1602 file->size = blockoff->pos;
1604 return rep;
1607 static int
1608 file_erase_continuous(struct hed_file *file, blockoff_t *blockoff, size_t len)
1610 struct file_block *block = blockoff->block;
1611 hed_uoff_t block_offset = blockoff->off;
1612 blockoff_t *cur;
1614 /* Move to the new position, so blockoff does not get destroyed */
1615 hed_move_relative(blockoff, len);
1617 assert(len > 0);
1618 nuke_blockoffs(block, block_offset,
1619 block_offset + len - (len < block->t.size));
1621 if (!block_offset) {
1622 block->dataoff += len;
1623 if (!block_is_inserted(block))
1624 block->phys_pos += len;
1625 } else if (block_offset + len < block->t.size &&
1626 !split_block(file, block, block_offset + len))
1627 return -1;
1629 move_blockoffs(block, block, block_offset, UOFF_MAX, -(hed_uoff_t)len);
1631 block->t.size -= len;
1632 recalc_block_recursive(file_blocks(file), block);
1634 /* Move the insert point if needed */
1635 list_for_each_entry(cur, &block->refs, list)
1636 if (cur->off >= block->t.size)
1637 cur->pos -= len;
1639 kill_block_if_empty(file, block);
1640 return 0;
1643 size_t
1644 hed_file_erase_block(struct hed_file *file, blockoff_t *blockoff,
1645 hed_uoff_t len)
1647 hed_uoff_t offset;
1648 hed_uoff_t todo;
1649 struct file_block *eofblock;
1651 offset = blockoff->pos;
1652 if (offset > file_size(file))
1653 return 0;
1654 else if (len > file_size(file) - offset)
1655 len = file_size(file) - offset;
1657 todo = len;
1658 while (todo) {
1659 size_t span = hed_cursor_chunk_len(blockoff, todo);
1661 if (file_erase_continuous(file, blockoff, span))
1662 break;
1664 todo -= span;
1666 len -= todo;
1668 file->size -= len;
1669 set_modified(file);
1671 eofblock = last_block(file_blocks(file));
1672 assert(block_is_virtual(eofblock));
1673 assert(block_is_eof(eofblock));
1674 eofblock->t.size += len;
1675 recalc_block_recursive(file_blocks(file), eofblock);
1677 slide_blockoffs(file, blockoff->block, blockoff->pos, -len);
1679 return todo;
1683 hed_file_insert_begin(struct hed_file *file, const hed_cursor_t *curs,
1684 hed_cursor_t *curs_ins)
1686 struct file_block *block, *newblock;
1688 BDEBUG("Starting insert at %llx\n", curs->pos);
1690 newblock = new_block(file,
1691 HED_BLOCK_EXCACHE | HED_BLOCK_INSERTED);
1692 if (!newblock)
1693 return -1;
1695 newblock->phys_pos = hed_cursor_phys_pos(curs);
1696 newblock->dataobj = block_data_new(file->cache, FILE_BLOCK_SIZE);
1697 if (!newblock->dataobj) {
1698 file_free_block(file, newblock);
1699 return -1;
1702 if (curs->off) {
1703 if (!split_block(file, curs->block, curs->off)) {
1704 file_free_block(file, newblock);
1705 return -1;
1707 block = curs->block;
1708 } else
1709 block = prev_block(curs->block);
1711 chain_block_after(file_blocks(file), block, newblock);
1713 curs_ins->pos = curs->pos;
1714 curs_ins->off = newblock->t.size;
1715 curs_ins->block = newblock;
1716 list_add(&curs_ins->list, &newblock->refs);
1718 dump_blocks(file);
1719 return 0;
1722 void
1723 hed_file_insert_end(struct hed_file *file, blockoff_t *blockoff_ins)
1725 struct file_block *block = blockoff_ins->block;
1727 BDEBUG("End insert at %llx\n", blockoff_ins->pos);
1729 blockoff_ins->block = NULL;
1730 list_del(&blockoff_ins->list);
1731 if (!kill_block_if_empty(file, block))
1732 block_data_shrink(file->cache, block->dataobj, block->t.size);
1734 dump_blocks(file);
1737 static void
1738 insert_block(struct hed_file *file, blockoff_t *blockoff,
1739 unsigned char *buf, size_t len)
1741 struct file_block *block = blockoff->block;
1742 hed_uoff_t offset = blockoff->pos;
1744 assert(file && offset >= 0);
1746 assert(block_is_excache(block));
1747 block_set_dirty(block);
1748 set_modified(file);
1750 memcpy(block_data(block) + blockoff->off, buf, len);
1751 block->t.size += len;
1752 recalc_block_recursive(file_blocks(file), block);
1753 blockoff->off += len;
1754 blockoff->pos += len;
1756 if (blockoff->pos > file->size)
1757 file->size = blockoff->pos;
1758 else
1759 file->size += len;
1761 slide_blockoffs(file, next_block(block), offset, len);
1764 size_t
1765 hed_file_insert_block(struct hed_file *file, blockoff_t *blockoff,
1766 unsigned char *buf, size_t len)
1768 while (len) {
1769 struct file_block *block = blockoff->block;
1770 size_t remain = block->dataobj->size - blockoff->off;
1772 if (!remain) {
1773 list_del(&blockoff->list);
1774 blockoff->block = next_block(block);
1775 blockoff->off = 0;
1777 if (!hed_file_insert_begin(file, blockoff, blockoff))
1778 continue;
1780 blockoff->block = block;
1781 blockoff->off = block->t.size;
1782 list_add(&blockoff->list, &block->refs);
1783 break;
1786 if (remain > len)
1787 remain = len;
1789 BDEBUG("Append %ld bytes to the insert block\n",
1790 (long) remain);
1791 insert_block(file, blockoff, buf, remain);
1792 buf += remain;
1793 len -= remain;
1795 return len;
1799 hed_file_insert_byte(struct hed_file *file, blockoff_t *blockoff,
1800 unsigned char byte)
1802 return hed_file_insert_block(file, blockoff, &byte, 1);
1805 size_t
1806 hed_file_insert_once(struct hed_file *file, blockoff_t *blockoff,
1807 unsigned char *buf, size_t len)
1809 blockoff_t insert;
1811 if (!hed_file_insert_begin(file, blockoff, &insert)) {
1812 len = hed_file_insert_block(file, &insert, buf, len);
1813 hed_file_insert_end(file, &insert);
1815 return len;
1818 struct commit_control {
1819 struct hed_file *file;
1820 int wfd; /* file descriptor for writing */
1821 int needwrite; /* non-zero if write is needed */
1822 blockoff_t begoff, endoff;
1823 hed_off_t shift;
1824 void *partbuf; /* allocated 3*FILE_BLOCK_SIZE */
1825 void *partial; /* pointer into partbuf */
1828 /* Get the logical<->physical shift value after the specified block.
1829 * It is essential to get the value _AFTER_ the block, because the
1830 * shift value is used to decide how the current block will be handled.
1832 static hed_off_t
1833 get_shift(const blockoff_t *blockoff)
1835 struct file_block *block = blockoff->block;
1836 size_t curshift = block_is_inserted(block) ? block->t.size : 0;
1837 return curshift +
1838 blockoff->pos - blockoff->off - block->phys_pos;
1841 static void
1842 switch_partial(struct commit_control *cc)
1844 cc->partial += FILE_BLOCK_SIZE;
1845 if (cc->partial >= cc->partbuf + 3*FILE_BLOCK_SIZE)
1846 cc->partial = cc->partbuf;
1849 /* Write @writelen bytes from the partial buffer at @cc->begoff. */
1850 static int
1851 commit_block(struct commit_control *cc, size_t len)
1853 ssize_t written;
1855 assert(len > 0);
1856 BDEBUG(" -> write %lx bytes at %llx\n",
1857 (unsigned long)len, cc->begoff.pos - len);
1858 written = pwrite(cc->wfd, cc->partial, len, cc->begoff.pos - len);
1859 if (written < len)
1860 /* TODO: keep data in a new list of dirty blocks */
1861 return -1;
1862 return 0;
1865 static int
1866 commit_partial(struct commit_control *cc)
1868 size_t partoff, remain, left;
1869 size_t writelen;
1871 partoff = FILE_BLOCK_OFF(cc->begoff.pos);
1872 remain = FILE_BLOCK_SIZE - partoff;
1873 if (remain > cc->endoff.pos - cc->begoff.pos)
1874 remain = cc->endoff.pos - cc->begoff.pos;
1875 if ((writelen = partoff + remain) == 0)
1876 return 0;
1878 BDEBUG("Fill partial %llx-%llx\n",
1879 cc->begoff.pos, cc->begoff.pos + remain);
1881 left = copy_in(cc->file, cc->partial + partoff, remain, &cc->begoff);
1882 if (left) {
1883 hed_move_relative(&cc->begoff, left);
1884 return -1;
1887 if (FILE_BLOCK_OFF(cc->begoff.pos) && !block_is_eof(cc->begoff.block))
1888 return 0;
1890 return commit_block(cc, writelen);
1893 /* Commit forwards.
1894 * Beware, cc->begoff is undefined upon return!
1896 static int
1897 commit_forwards(struct commit_control *cc)
1899 hed_uoff_t endpos = cc->endoff.pos;
1900 int ret = 0;
1902 BDEBUG("Writing forwards %llx-%llx\n",
1903 cc->begoff.pos, cc->endoff.pos);
1905 if (!cc->needwrite)
1906 return 0;
1908 while (cc->begoff.pos < endpos)
1909 ret |= commit_partial(cc);
1911 return ret;
1914 /* Commit forwards.
1915 * Beware, cc->begoff is undefined upon return!
1917 static int
1918 commit_backwards(struct commit_control *cc)
1920 void *retpartial = cc->partial;
1921 hed_uoff_t begpos = cc->begoff.pos;
1922 hed_uoff_t blkpos; /* start of current partial block */
1923 int ret = 0;
1925 BDEBUG("Writing backwards %llx-%llx\n",
1926 cc->begoff.pos, cc->endoff.pos);
1928 if (!cc->needwrite)
1929 return 0;
1931 blkpos = FILE_BLOCK_ROUND(cc->endoff.pos);
1932 if (blkpos <= begpos)
1933 goto final;
1935 /* Handle the trailing partial block */
1936 hed_get_cursor(cc->file, blkpos, &cc->begoff);
1937 switch_partial(cc);
1938 ret |= commit_partial(cc);
1939 retpartial = cc->partial;
1941 /* Handle the middle part */
1942 switch_partial(cc);
1943 while ( (blkpos -= FILE_BLOCK_SIZE) > begpos) {
1944 hed_get_cursor(cc->file, blkpos, &cc->begoff);
1945 ret |= commit_partial(cc);
1947 switch_partial(cc); /* wrap around */
1949 final:
1950 /* Handle the first block (partiall or not) */
1951 hed_get_cursor(cc->file, begpos, &cc->begoff);
1952 ret |= commit_partial(cc);
1954 cc->partial = retpartial;
1955 return ret;
1958 /* Handle the partial block before a skipped one. */
1959 static int
1960 begin_skip(struct commit_control *cc)
1962 size_t minsize = FILE_BLOCK_SIZE - FILE_BLOCK_OFF(cc->endoff.pos);
1963 size_t remain;
1964 int ret = 0;
1966 /* Check if at least one complete physical block can be skipped */
1967 if (cc->endoff.block->t.size < minsize)
1968 return 0;
1970 /* Write out the partially dirty block */
1971 remain = FILE_BLOCK_OFF(minsize);
1972 hed_move_relative(&cc->endoff, remain);
1973 if (cc->shift <= 0)
1974 ret |= commit_forwards(cc);
1975 else
1976 ret |= commit_backwards(cc);
1977 hed_move_relative(&cc->endoff, -(hed_off_t)remain);
1978 hed_dup2_cursor(&cc->endoff, &cc->begoff);
1980 cc->needwrite = 0;
1981 return ret;
1984 /* Handle the last partially skipped physical block. */
1985 static int
1986 end_skip(struct commit_control *cc)
1988 size_t partlen;
1989 int ret = 0;
1991 /* Find the beginning of the physical block */
1992 hed_dup2_cursor(&cc->endoff, &cc->begoff);
1993 partlen = FILE_BLOCK_OFF(cc->begoff.pos);
1994 hed_move_relative(&cc->begoff, -(hed_off_t)partlen);
1996 /* Read the partial data before this block */
1997 if (hed_file_cpin(cc->file, cc->partial, partlen, &cc->begoff))
1998 ret = -1;
2000 cc->needwrite = 1;
2001 return ret;
2004 static void
2005 undirty_blocks(struct hed_file *file)
2007 struct file_block *block, *next;
2008 hed_uoff_t pos = 0;
2010 BDEBUG("Undirtying blocks:\n");
2011 dump_blocks(file);
2013 foreachsafe_block (block, next, file_blocks(file)) {
2014 if (kill_block_if_empty(file, block))
2015 continue;
2017 if (!block_is_virtual(block)) {
2018 cache_put(file->cache, block->dataobj);
2019 block->dataobj = NULL;
2020 list_del_init(&block->lru);
2021 block->flags = HED_BLOCK_EXCACHE | HED_BLOCK_VIRTUAL;
2024 block->phys_pos = pos;
2025 pos += block->t.size;
2028 block = first_block(file_blocks(file));
2029 while (!block_is_eof(block)) {
2030 next = next_block(block);
2031 file_kill_block(file, block);
2032 block = next;
2035 BDEBUG("After undirtying\n");
2036 dump_blocks(file);
2039 static int
2040 commit_init(struct commit_control *cc, struct hed_file *file)
2042 cc->file = file;
2044 cc->partbuf = malloc(3*FILE_BLOCK_SIZE);
2045 if (!cc->partbuf)
2046 goto err;
2048 cc->wfd = open(file->name,
2049 O_RDWR | (file->fd < 0 ? O_CREAT : 0), 0666);
2050 if (cc->wfd < 0)
2051 goto err_free;
2053 if (file->fd < 0 &&
2054 (file->fd = open(file->name, O_RDONLY)) < 0)
2055 goto err_close;
2057 return 0;
2059 err_close:
2060 close(cc->wfd);
2061 err_free:
2062 free(cc->partbuf);
2063 err:
2064 return -1;
2068 hed_file_commit(struct hed_file *file)
2070 struct commit_control cc;
2071 int ret = 0;
2073 if (commit_init(&cc, file))
2074 return -1;
2076 dump_blocks(file);
2078 cc.partial = cc.partbuf;
2079 get_cursor(file, 0,&cc.begoff);
2080 hed_dup_cursor(&cc.begoff, &cc.endoff);
2081 cc.shift = -cc.begoff.block->phys_pos;
2082 cc.needwrite = 0;
2083 while(!block_is_eof(cc.endoff.block)) {
2084 hed_off_t newshift = cc.endoff.pos < file->phys_size
2085 ? get_shift(&cc.endoff)
2086 : 0;
2088 if (cc.shift <= 0 && newshift > 0) {
2089 ret |= commit_forwards(&cc);
2090 hed_dup2_cursor(&cc.endoff, &cc.begoff);
2091 } else if (cc.shift > 0 && newshift <= 0) {
2092 ret |= commit_backwards(&cc);
2093 hed_dup2_cursor(&cc.endoff, &cc.begoff);
2095 cc.shift = newshift;
2097 if (!newshift && !block_is_dirty(cc.endoff.block)) {
2098 if (cc.needwrite)
2099 ret |= begin_skip(&cc);
2100 } else if (!cc.needwrite)
2101 ret |= end_skip(&cc);
2103 if (move_to_next(file, &cc.endoff))
2104 break;
2106 assert(cc.endoff.pos == file_size(file));
2108 if (cc.begoff.pos < file_size(file)) {
2109 if (cc.shift <= 0)
2110 ret |= commit_forwards(&cc);
2111 else
2112 ret |= commit_backwards(&cc);
2115 hed_put_cursor(&cc.begoff);
2116 hed_put_cursor(&cc.endoff);
2118 ftruncate(cc.wfd, file_size(file));
2119 file->phys_size = file_size(file);
2121 ret |= close(cc.wfd);
2122 free(cc.partbuf);
2124 undirty_blocks(file);
2126 if (!ret)
2127 clear_modified(file);
2129 return ret;
2132 #ifdef HED_CONFIG_SWAP
2134 hed_file_write_swap(struct hed_file *file)
2136 return swp_write(file_swp(file));
2139 static int
2140 do_read_swap(struct hed_file *file, struct swp_file *swp, blockoff_t *pos)
2142 struct hed_file *swpfile = swp_private(swp);
2143 struct file_block *cur, block;
2144 hed_uoff_t phys_pos;
2146 if (file_stat(swpfile)->st_size != file_stat(file)->st_size ||
2147 file_stat(swpfile)->st_mtime != file_stat(file)->st_mtime) {
2148 fprintf(stderr, "stat info mismatch (you modified the file since hed ran on it; refusing to touch it)\n");
2149 return -1;
2152 BDEBUG("Swap header match\n");
2154 phys_pos = 0;
2155 for (cur = first_block(file_blocks(swpfile));
2156 cur != swpfile->null_block; cur = next_block(&block)) {
2157 struct hed_block_data dataobj;
2158 size_t (*mergefn)(struct hed_file*, blockoff_t*,
2159 unsigned char*, size_t);
2160 void *data;
2161 size_t res;
2163 if (swp_cpin(swp, &block, cur, sizeof(struct file_block))) {
2164 perror("Cannot read block descriptor");
2165 return -1;
2167 BDEBUG("BLOCK %p: flags %02lx phys 0x%02llx size 0x%llx\n",
2168 cur, block.flags, (long long)block.phys_pos,
2169 (long long)hed_block_size(&block));
2171 if (block.phys_pos - phys_pos) {
2172 if (hed_file_erase_block(file, pos,
2173 block.phys_pos - phys_pos)) {
2174 perror("Cannot erase");
2175 return -1;
2177 phys_pos = block.phys_pos;
2180 if (!block_is_inserted(&block))
2181 phys_pos += hed_block_size(&block);
2183 if (!block_is_dirty(&block)) {
2184 hed_move_relative(pos, hed_block_size(&block));
2185 continue;
2188 if (swp_cpin(swp, &dataobj, block.dataobj,
2189 sizeof(struct hed_block_data))) {
2190 perror("Cannot read data descriptor");
2191 return -1;
2193 BDEBUG("DATA %p: size 0x%lx\n",
2194 block.dataobj, (long)dataobj.size);
2196 if (! (data = malloc(hed_block_size(&block))) ) {
2197 perror("Cannot allocate data");
2198 return -1;
2201 if (swp_cpin(swp, data, dataobj.data + block.dataoff,
2202 hed_block_size(&block))) {
2203 perror("Cannot read data");
2204 return -1;
2207 mergefn = block_is_inserted(&block)
2208 ? hed_file_insert_once
2209 : hed_file_set_block;
2210 res = mergefn(file, pos, data, hed_block_size(&block));
2211 free(data);
2212 if (res) {
2213 perror("Cannot merge data");
2214 return -1;
2218 dump_blocks(file);
2219 return 0;
2223 hed_file_read_swap(struct hed_file *file)
2225 struct swp_file *swp;
2226 blockoff_t pos;
2227 int ret;
2229 if (! (swp = swp_init_read(file->swpname)) )
2230 return -1;
2232 get_cursor(file, 0, &pos);
2233 ret = do_read_swap(file, swp, &pos);
2234 hed_put_cursor(&pos);
2236 swp_done(swp);
2237 return ret;
2240 #endif /* HED_CONFIG_SWAP */
2242 struct ffb_hookdata {
2243 struct hed_file *file;
2244 blockoff_t *pos;
2245 hed_expr_reg_cb base_ecb;
2246 void *base_ecb_data;
2249 static long
2250 eval_reg_cb(void *hookdata, char reg, hed_off_t ofs,
2251 unsigned char *scramble, size_t len)
2253 struct ffb_hookdata *data = hookdata;
2254 if (reg == '.') {
2255 blockoff_t pos;
2256 long ret = HED_AEF_DYNAMIC;
2258 hed_dup_cursor(data->pos, &pos);
2259 hed_move_relative(&pos, ofs);
2260 if (copy_in(data->file, scramble, len, &pos))
2261 ret = HED_AEF_ERROR;
2262 hed_put_cursor(&pos);
2263 return ret;
2266 return data->base_ecb(data->base_ecb_data, reg, ofs, scramble, len);
2269 static void
2270 reverse(unsigned char *p, size_t len)
2272 unsigned char *q = p + len;
2273 while (p < q) {
2274 unsigned char x = *p;
2275 *p++ = *--q;
2276 *q = x;
2280 static void
2281 compute_badchar(ssize_t *badchar, const unsigned char *s, ssize_t len)
2283 size_t i = 1;
2284 while (i < len)
2285 badchar[*s++] = i++;
2288 static void
2289 compute_sfx(ssize_t *sfx, const unsigned char *s, ssize_t len)
2291 ssize_t f, g, i;
2293 sfx[len - 1] = len;
2294 g = len - 1;
2295 for (i = len - 2; i >= 0; --i) {
2296 if (i > g && sfx[i + len - 1 - f] < i - g)
2297 sfx[i] = sfx[i + len - 1 - f];
2298 else {
2299 if (i < g)
2300 g = i;
2301 f = i;
2302 while (g >= 0 && s[g] == s[g + len - 1 - f])
2303 --g;
2304 sfx[i] = f - g;
2309 static void
2310 compute_goodsfx(ssize_t *goodsfx, const unsigned char *s, ssize_t len)
2312 ssize_t i, j, *sfx = goodsfx + len;
2314 compute_sfx(sfx, s, len);
2316 for (i = 0; i < len; ++i)
2317 goodsfx[i] = len;
2318 j = 0;
2319 for (i = len - 1; i >= 0; --i)
2320 if (sfx[i] == i + 1)
2321 for (; j < len - 1 - i; ++j)
2322 if (goodsfx[j] == len)
2323 goodsfx[j] = len - 1 - i;
2324 for (i = 0; i <= len - 2; ++i)
2325 goodsfx[len - 1 - sfx[i]] = len - 1 - i;
2328 /* Search for a constant byte string using the Boyer-Moore algorithm. */
2329 static inline unsigned char*
2330 bm_find(unsigned char *buf, ssize_t buflen, unsigned char *needle,
2331 ssize_t maxidx, ssize_t *badchar, ssize_t *goodsfx)
2333 while (buflen > maxidx) {
2334 unsigned char *p;
2335 ssize_t shift, i;
2337 for (p = buf + maxidx, i = maxidx; i >= 0; --p, --i)
2338 if (needle[i] != *p)
2339 break;
2340 if (i < 0)
2341 return buf;
2343 shift = i + 1 - badchar[*p];
2344 if (shift < goodsfx[i])
2345 shift = goodsfx[i];
2347 buf += shift;
2348 buflen -= shift;
2350 return NULL;
2353 /* Search for a constant byte string backwards. */
2354 static inline unsigned char*
2355 bm_find_rev(unsigned char *buf, ssize_t buflen, unsigned char *needle,
2356 ssize_t maxidx, ssize_t *badchar, ssize_t *goodsfx)
2358 buf -= maxidx;
2359 while (buflen > maxidx) {
2360 unsigned char *p;
2361 ssize_t shift, i;
2363 for (p = buf, i = maxidx; i >= 0; ++p, --i)
2364 if (needle[i] != *p)
2365 break;
2366 if (i < 0)
2367 return buf;
2369 shift = i + 1 - badchar[*p];
2370 if (shift < goodsfx[i])
2371 shift = goodsfx[i];
2373 buf -= shift;
2374 buflen -= shift;
2376 return NULL;
2379 /* Search for a constant byte string in @buf.
2380 * If @buflen is negative, search backwards, otherwise search forwards.
2382 static inline unsigned char*
2383 find_bytestr_buf(unsigned char *buf, ssize_t buflen,
2384 unsigned char *needle, ssize_t maxidx,
2385 ssize_t *badchar, ssize_t *goodsfx)
2387 if (buflen < 0) {
2388 buflen = -buflen;
2389 if (!maxidx)
2390 return memrchr(buf - buflen + 1, *needle, buflen);
2391 return bm_find_rev(buf, buflen, needle, maxidx,
2392 badchar, goodsfx);
2393 } else {
2394 if (!maxidx)
2395 return memchr(buf, *needle, buflen);
2396 return bm_find(buf, buflen, needle, maxidx,
2397 badchar, goodsfx);
2401 /* Search for a constant byte string using the Boyer-Moore algorithm. */
2402 static int
2403 find_bytestr(struct hed_file *file, blockoff_t *from, int dir,
2404 unsigned char *needle, ssize_t len)
2406 void *dynalloc;
2407 ssize_t *badchar, *goodsfx;
2408 unsigned char *readbuf;
2409 ssize_t slen;
2410 int ret;
2412 if (len > 1) {
2413 dynalloc = calloc(sizeof(ssize_t) * (256 + 2*len)
2414 + 2*(len-1), 1);
2415 if (!dynalloc)
2416 return HED_FINDOFF_ERROR;
2417 badchar = dynalloc;
2418 goodsfx = badchar + 256;
2419 readbuf = dynalloc + sizeof(ssize_t) * (256 + 2*len);
2421 if (dir < 0)
2422 reverse(needle, len);
2423 compute_badchar(badchar, needle, len);
2424 compute_goodsfx(goodsfx, needle, len);
2425 } else {
2426 dynalloc = NULL;
2427 badchar = goodsfx = NULL;
2428 readbuf = NULL;
2431 --len; /* simplify offset computing */
2432 if (dir < 0) {
2433 from->off += len;
2434 from->pos += len;
2435 slen = -len;
2436 } else
2437 slen = len;
2439 ret = HED_FINDOFF_NO_MATCH;
2440 while (from->pos >= 0) {
2441 ssize_t remain;
2443 fixup_blockoff(from);
2444 if (block_is_eof(from->block))
2445 break;
2447 remain = hed_prepare_read(file, from, SSIZE_MAX);
2448 if (!remain) {
2449 ret = HED_FINDOFF_ERROR;
2450 break;
2452 if (dir < 0)
2453 remain = -(from->off + 1);
2455 if (!hed_block_is_bad(from->block)) {
2456 unsigned char *p, *q;
2458 if ((dir >= 0 && remain > slen) ||
2459 (dir < 0 && remain < slen)) {
2460 assert(!hed_block_is_virtual(from->block));
2461 assert(from->block->dataobj);
2462 p = from->block->dataobj->data + from->off;
2463 from->off += remain;
2464 from->pos += remain;
2465 } else if (dir >= 0) {
2466 remain += slen;
2467 if (copy_in(file, readbuf, remain, from)) {
2468 ret = HED_FINDOFF_ERROR;
2469 break;
2471 p = readbuf;
2472 } else {
2473 remain += slen;
2474 from->off += remain + 1;
2475 from->pos += remain + 1;
2476 if (from->pos < 0)
2477 break;
2478 fixup_blockoff_slow(from);
2479 if (copy_in(file, readbuf, -remain, from)) {
2480 ret = HED_FINDOFF_ERROR;
2481 break;
2483 from->off -= -remain + 1;
2484 from->pos -= -remain + 1;
2485 p = readbuf + (-remain - 1);
2488 q = find_bytestr_buf(p, remain, needle, len,
2489 badchar, goodsfx);
2490 if (q) {
2491 move_rel_fast(from, q - p - remain);
2492 ret = 0;
2493 break;
2495 from->off -= slen;
2496 from->pos -= slen;
2497 } else {
2498 /* bad blocks cannot match anything */
2499 from->off += remain;
2500 from->pos += remain;
2504 if (dynalloc)
2505 free(dynalloc);
2506 return ret;
2509 static int
2510 find_expr(struct hed_file *file, blockoff_t *from, int dir,
2511 struct hed_expr *expr, struct ffb_hookdata *data)
2513 int len = hed_expr_len(expr);
2514 unsigned char *buf;
2516 assert(len > 0);
2518 if (len > file_size(file))
2519 return HED_FINDOFF_NO_MATCH;
2520 if ((hed_off_t)file_size(file) - from->pos - len < 0)
2521 hed_move_relative(from,
2522 (hed_off_t)file_size(file) - from->pos - len);
2524 for (;;) {
2525 blockoff_t match;
2526 size_t remain;
2527 unsigned char *p;
2528 int pos;
2530 buf = hed_expr_eval(expr, eval_reg_cb, NULL, data);
2531 if (!buf)
2532 return HED_FINDOFF_ERROR;
2534 hed_dup_cursor(from, &match);
2535 remain = 0;
2536 for (pos = 0; pos < len; pos++) {
2537 if (!remain) {
2538 remain = hed_prepare_read(file, &match,
2539 SIZE_MAX);
2540 if (!remain ||
2541 hed_block_is_bad(match.block))
2542 break;
2543 p = block_data(match.block) + match.off;
2544 blockoff_next_block(file, &match);
2546 if (*p++ != buf[pos])
2547 break;
2548 remain--;
2550 hed_put_cursor(&match);
2552 if (pos == len)
2553 return 0;
2554 if (!remain)
2555 return HED_FINDOFF_ERROR;
2557 from->pos += dir;
2558 from->off += dir;
2559 if (0 > from->pos || from->pos > file_size(file) - len)
2560 break;
2561 fixup_blockoff(from);
2563 if (! (hed_expr_flags(expr) & HED_AEF_DYNAMIC) )
2564 return find_bytestr(file, from, dir, buf, len);
2567 return HED_FINDOFF_NO_MATCH;
2571 hed_file_find_expr(struct hed_file *file, blockoff_t *pos, int dir,
2572 struct hed_expr *expr,
2573 hed_expr_reg_cb expr_cb, void *expr_cb_data)
2575 struct ffb_hookdata data;
2576 int res;
2578 assert(dir == 1 || dir == -1);
2580 data.file = file;
2581 data.pos = pos;
2582 data.base_ecb = expr_cb;
2583 data.base_ecb_data = expr_cb_data;
2585 hed_file_set_readahead(file,
2586 dir > 0 ? HED_RA_FORWARD : HED_RA_BACKWARD);
2587 res = find_expr(file, pos, dir, expr, &data);
2588 hed_file_set_readahead(file, HED_RA_NONE);
2590 return res;