Fix block debugging
[hed.git] / libhed / file.c
blob71ea9f682c1f2106596bbe5ef4d8b12f1cbab66b
1 /* $Id$ */
3 /*
4 * hed - Hexadecimal editor
5 * Copyright (C) 2004 Petr Baudis <pasky@ucw.cz>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of version 2 of the GNU General Public License as
9 * published by the Free Software Foundation.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 * There hammer on the anvil smote,
23 * There chisel clove, and graver wrote;
24 * There forged was blade, and bound was hilt;
25 * The delver mined, the mason built.
26 * There beryl, pearl, and opal pale,
27 * And metal wrought like fishes' mail,
28 * Buckler and corslet, axe and sword,
29 * And shining spears were laid in hoard.
32 /* Feature macros needed for:
33 * - memrchr
34 * - pread, pwrite
35 * - stpcpy
37 #define _GNU_SOURCE
39 #include <config.h>
41 #include <errno.h>
42 #include <fcntl.h>
43 #include <limits.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <sys/ioctl.h>
48 #include <unistd.h>
49 #include <linux/fs.h> /* BLKGETSIZE and BLKGETSIZE64 */
51 #include "file.h"
52 #include "file_priv.h"
53 #include "access.h"
54 #include "cache.h"
55 #include "swap.h"
56 #include "tree.h"
57 #include "expr.h"
59 /* memrchr() might not be available */
60 #ifndef HAVE_MEMRCHR
61 # include "memrchr.c"
62 #endif /* HAVE_MEMRCHR */
65 * `Piles of jewels?' said Gandalf. `No. The Orcs have often plundered Moria;
66 * there is nothing left in the upper halls. And since the dwarves fled, no one
67 * dares to seek the shafts and treasuries down in the deep places: they are
68 * drowned in water--or in a shadow of fear.'
71 /* TODO: Currently the file blocks allocation is not very sophisticated and
72 * when the weather is bad it could probably have rather horrible results. */
74 #undef BLOCKS_DEBUG
75 #ifdef BLOCKS_DEBUG
76 #define BDEBUG(x...) fprintf(stderr, x)
77 #else
78 #define BDEBUG(x...)
79 #endif
81 /* Number of blocks in cache */
82 #define CACHE_LENGTH 64
84 /* Blocks for readahead */
85 #define FILE_READAHEAD (CACHE_LENGTH/2)
87 #define first_block(f) next_block(last_block(f))
88 #define prev_block(b) (tree_entry(prev_in_tree(&(b)->t),struct hed_block,t))
89 #define next_block(b) (tree_entry(next_in_tree(&(b)->t),struct hed_block,t))
90 #define last_block(f) (&(f)->eof_block)
92 #define block_offset(b) tree_block_offset(&(b)->t)
94 #define recalc_block_recursive(b) recalc_node_recursive(&(b)->t)
96 #define chain_block_before(tree,b,p) insert_into_tree((tree), &(b)->t, &(p)->t)
97 #define recalc_chain_block_before(tree,b,p) do { \
98 chain_block_before((tree), (b), (p)); \
99 recalc_block_recursive((b)); \
100 } while (0)
102 #define unchain_block(tree,b) del_from_tree((tree), &(b)->t)
103 #define recalc_unchain_block(tree,b) recalc_del_from_tree((tree), &(b)->t)
105 #define init_block_list(tree,b) init_tree(tree, &(b)->t)
106 #define init_block_link(b) init_node(&(b)->t)
108 #define find_block(tree,o) tree_entry(find_in_tree((tree),(o)),struct hed_block,t)
110 #ifdef HED_CONFIG_SWAP
112 /* Return the swp file object */
113 static inline struct swp_file *
114 file_swp(struct file_priv *file)
116 return file->swp;
119 #else
121 /* Provide a stub for the non-swap case */
122 static inline void *
123 file_swp(struct file_priv *file)
125 return NULL;
128 #endif
130 #ifdef HED_CONFIG_READAHEAD
132 #define file_ra_none(f) ((f)->readahead == HED_RA_NONE)
133 #define file_ra_forward(f) ((f)->readahead == HED_RA_FORWARD)
134 #define file_ra_backward(f) ((f)->readahead == HED_RA_BACKWARD)
136 static inline void
137 set_readahead(struct file_priv *file, int val)
139 file->readahead = val;
142 #else
144 #define file_ra_none(f) (1)
145 #define file_ra_forward(f) (0)
146 #define file_ra_backward(f) (0)
147 #define set_readahead(file,t) do {} while(0)
149 #endif /* HED_CONFIG_READAHEAD */
151 void
152 hed_file_set_readahead(struct hed_file *file, enum hed_readahead val)
154 set_readahead(file_private(file), val);
158 /* Get the physical offset of the byte immediately following @block. */
159 static inline hed_uoff_t
160 phys_end(const struct hed_block *block)
162 return hed_block_is_inserted(block)
163 ? block->phys_pos
164 : block->phys_pos + hed_block_size(block);
167 static struct hed_block *
168 next_nonzero_block(struct hed_block *block)
170 while (!hed_block_is_eof(block)) {
171 block = next_block(block);
172 if (hed_block_size(block))
173 return block;
175 return NULL;
178 static struct hed_block *
179 prev_nonzero_block(struct hed_block *block)
181 do {
182 block = prev_block(block);
183 if (hed_block_is_eof(block))
184 return NULL;
185 } while (!hed_block_size(block));
186 return block;
189 bool
190 hed_block_is_after_erase(struct hed_block *block)
192 struct hed_block *prev = prev_nonzero_block(block);
193 return prev
194 ? block->phys_pos > phys_end(prev)
195 : !!block->phys_pos;
198 bool
199 hed_block_is_after_insert(struct hed_block *block)
201 struct hed_block *prev = prev_nonzero_block(block);
202 return prev && hed_block_is_inserted(prev);
205 /* Get the blocks tree */
206 static inline struct hed_tree *
207 hed_file_blocks(struct file_priv *file)
209 return &file->blocks;
212 #ifndef BLOCKS_DEBUG
213 # define dump_blocks(file) {}
214 #else
216 static hed_uoff_t
217 block_phys_size(struct hed_file *file, struct hed_block *block)
219 struct hed_block *next;
221 if (hed_block_is_eof(block))
222 return 0;
223 next = next_block(block);
224 return next->phys_pos - block->phys_pos;
227 static void
228 dump_block(int level, struct hed_file *file, struct hed_tree_node *node,
229 hed_uoff_t *cur_offset, hed_uoff_t *cur_poffset)
231 struct hed_block *block = tree_entry(node, struct hed_block, t);
232 bool virtual = hed_block_is_virtual(block);
233 unsigned char *p;
234 hed_cursor_t *cur;
235 char t[21] = " ";
237 if (node->left)
238 dump_block(level + 1, file, node->left, cur_offset, cur_poffset);
239 p = hed_block_data(block);
240 if (level < 20) t[level] = '>'; else t[19] = '.';
241 fprintf(stderr, "%s [%06llx] [%06llx] %c%c%c %05llx %05llx"
242 " {%02x%02x%02x%02x} -- %p ^%p [%06llx]\n",
244 (unsigned long long) *cur_offset,
245 (unsigned long long) *cur_poffset,
246 virtual ? 'v' : ' ',
247 hed_block_is_inserted(block) ? 'i' : ' ',
248 hed_block_is_dirty(block) ? '*' : ' ',
249 (unsigned long long) node->size,
250 (unsigned long long) block_phys_size(file, block),
251 p && block->t.size > 0 ? p[0] : 0,
252 p && block->t.size > 1 ? p[1] : 0,
253 p && block->t.size > 2 ? p[2] : 0,
254 p && block->t.size > 3 ? p[3] : 0,
255 node, node->up,
256 (unsigned long long) node->cover_size
258 list_for_each_entry (cur, &block->refs, list) {
259 fprintf(stderr, " <%p>: %llx->%p:%llx\n",
260 cur, (long long)cur->pos,
261 cur->block, (unsigned long long)cur->off);
263 assert(*cur_poffset == block->phys_pos);
264 *cur_offset += node->size;
265 *cur_poffset += block_phys_size(file, block);
266 if (node->right)
267 dump_block(level + 1, file, node->right, cur_offset, cur_poffset);
268 assert(node->cover_size == (node->left ? node->left->cover_size : 0)
269 + (node->right ? node->right->cover_size : 0)
270 + node->size);
273 /* Walk the tree manually here, because foreach_block() does not provide
274 * the tree structure.
275 * TODO: Change this if you plan to debug any other block containers.
277 static void
278 dump_blocks(struct file_priv *file)
280 struct hed_block *first = first_block(file);
281 hed_uoff_t cur_offset, cur_poffset;
283 fprintf(stderr, "-- blocks dump --\n");
284 cur_offset = 0;
285 cur_poffset = first->phys_pos;
286 dump_block(0, file, hed_file_blocks(file)->root,
287 &cur_offset, &cur_poffset);
288 fprintf(stderr, "-- blocks dump end --\n");
290 #endif
292 static void
293 get_cursor(struct file_priv *file, hed_uoff_t offset, hed_cursor_t *curs)
295 struct hed_block *block;
297 block = find_block(hed_file_blocks(file), offset);
298 assert(block != NULL);
299 curs->pos = offset;
300 curs->block = block;
301 curs->off = offset - block_offset(block);
302 list_add(&curs->list, &block->refs);
304 BDEBUG("Mapped %llx to %llx+%llx/%llx\n",
305 offset, offset - curs->off, curs->off, block->t.size);
308 void
309 hed_get_cursor(struct hed_file *file, hed_uoff_t offset, hed_cursor_t *curs)
311 get_cursor(file_private(file), offset, curs);
314 static inline void
315 put_cursor(hed_cursor_t *curs)
317 list_del(&curs->list);
320 void
321 hed_put_cursor(hed_cursor_t *curs)
323 put_cursor(curs);
326 void
327 hed_update_cursor(struct hed_file *file, hed_uoff_t offset, hed_cursor_t *curs)
329 put_cursor(curs);
330 get_cursor(file_private(file), offset, curs);
333 void
334 hed_dup_cursor(const hed_cursor_t *src, hed_cursor_t *dst)
336 dst->pos = src->pos;
337 dst->block = src->block;
338 dst->off = src->off;
339 list_add_tail(&dst->list, &src->block->refs);
342 void
343 hed_dup2_cursor(const hed_cursor_t *src, hed_cursor_t *dst)
345 if (hed_is_a_cursor(dst))
346 put_cursor(dst);
347 hed_dup_cursor(src, dst);
350 /* Move cursors from @old to @new, adding @off to their block
351 * offsets to keep them at the same position. */
352 static void
353 update_cursors(const struct hed_block *old, struct hed_block *new,
354 hed_off_t off)
356 hed_cursor_t *curs;
358 BDEBUG("Updating cursors from <%p> to <%p>%c%llx\n",
359 old, new, off >= 0 ? '+' : '-', off >= 0 ? off : -off);
361 list_for_each_entry(curs, &old->refs, list) {
362 curs->block = new;
363 curs->off += off;
367 /* Move cursors in the range <@start;@end> from @old to @new,
368 * adding @off to their block offset, plus moving the reference list. */
369 static void
370 move_cursors(const struct hed_block *old, struct hed_block *new,
371 hed_uoff_t start, hed_uoff_t end, hed_off_t off)
373 hed_cursor_t *curs, *nextcurs;
375 BDEBUG("Moving cursors from <%p>:%llx:%llx to <%p>%c%llx\n",
376 old, start, end, new,
377 off >= 0 ? '+' : '-', off >= 0 ? off : -off);
379 list_for_each_entry_safe(curs, nextcurs, &old->refs, list)
380 if (curs->off >= start && curs->off <= end) {
381 curs->block = new;
382 curs->off += off;
383 list_move(&curs->list, &new->refs);
387 /* Move cursors in the range @block:<@start;@end> to @newpos */
388 static void
389 move_cursors_abs(const struct hed_block *block,
390 hed_uoff_t start, hed_uoff_t end,
391 const hed_cursor_t *newpos)
393 hed_cursor_t *curs, *nextcurs;
395 BDEBUG("Moving cursors from <%p>:%llx:%llx to <%p>:%llx\n",
396 block, start, end, newpos->block, newpos->off);
398 list_for_each_entry_safe(curs, nextcurs, &block->refs, list)
399 if (curs->off >= start && curs->off <= end) {
400 curs->pos = newpos->pos;
401 curs->block = newpos->block;
402 curs->off = newpos->off;
403 list_move(&curs->list, &newpos->block->refs);
407 /* Update the positions of cursors at and after @start for all
408 * blocks starting at @block */
409 static void
410 slide_cursors(const struct hed_block *block, hed_uoff_t start, hed_off_t off)
412 hed_cursor_t *curs;
413 const struct hed_block *nblock;
415 BDEBUG("Sliding cursors >= %llx by %c%llx, starting at <%p>\n",
416 start, off >= 0 ? '+' : '-', off >= 0 ? off : -off, block);
417 nblock = block;
418 do {
419 block = nblock;
420 list_for_each_entry(curs, &block->refs, list)
421 if (curs->pos >= start)
422 curs->pos += off;
423 nblock = next_block(block);
424 } while (!hed_block_is_eof(block));
427 static struct hed_block *
428 new_block(struct file_priv *file, long flags)
430 struct hed_block *new;
432 if (! (new = swp_zalloc(file_swp(file), sizeof(struct hed_block))) )
433 return NULL;
435 new->flags = flags;
436 init_block_link(new);
437 INIT_LIST_HEAD(&new->refs);
438 if (flags & HED_BLOCK_EXCACHE)
439 INIT_LIST_HEAD(&new->lru);
440 else
441 list_add_tail(&new->lru, &file->lru);
443 return new;
446 static struct hed_block *
447 new_virt_block(struct file_priv *file, hed_uoff_t pos, hed_uoff_t size,
448 long extraflags)
450 struct hed_block *new =
451 new_block(file, (HED_BLOCK_EXCACHE |
452 HED_BLOCK_VIRTUAL |
453 extraflags));
454 if (!new)
455 return NULL;
457 new->t.size = size;
458 new->phys_pos = pos;
459 BDEBUG("Spawned new virtual block [%llx] at %llx\n", size, pos);
460 return new;
463 static struct hed_block *
464 new_data_block(struct file_priv *file, hed_uoff_t pos, hed_uoff_t size,
465 struct hed_block_data *dataobj)
467 struct hed_block *new =
468 new_block(file, 0);
469 if (!new)
470 return NULL;
472 cache_get(dataobj);
473 new->dataobj = dataobj;
474 new->t.size = size;
475 new->phys_pos = pos;
476 new->dataoff = FILE_BLOCK_OFF(pos);
477 BDEBUG("Spawned new data block [%llx] at %llx\n", size, pos);
478 return new;
481 static void
482 file_free_block(struct file_priv *file, struct hed_block *block)
484 if (block->dataobj)
485 cache_put(file->cache, block->dataobj);
486 list_del(&block->lru);
488 swp_free(file_swp(file), block);
491 static bool
492 kill_block_if_empty(struct file_priv *file, struct hed_block *block)
494 if (!hed_block_is_eof(block) && block->t.size == 0 &&
495 list_empty(&block->refs)) {
496 /* No recalculation needed, zero size. */
497 unchain_block(hed_file_blocks(file), block);
498 file_free_block(file, block);
499 return true;
501 return false;
504 /* This may kill the previous block as well, if it can be merged
505 * with the next one. It will never kill anything which _follows_. */
506 static void
507 file_kill_block(struct file_priv *file, struct hed_block *block)
509 hed_uoff_t phys_pos = block->phys_pos;
510 struct hed_block *prev = prev_block(block);
511 struct hed_block *next = next_block(block);
512 struct hed_block *merger;
513 bool killprev = false;
515 /* We should never kill a dirty block! */
516 assert(!hed_block_is_dirty(block));
517 /* We shouldn't get with an empty block here (that might
518 * need special considerations with virtualization). */
519 assert(block->t.size > 0);
521 if (!hed_block_is_eof(block) &&
522 hed_block_is_inner_virtual(next) &&
523 phys_pos + block->t.size == next->phys_pos) {
524 if (!hed_block_is_eof(prev) &&
525 hed_block_is_inner_virtual(prev) &&
526 prev->phys_pos + prev->t.size == phys_pos)
527 killprev = true;
528 merger = next;
529 merger->phys_pos -= block->t.size;
530 update_cursors(merger, merger, block->t.size);
531 update_cursors(block, merger, 0);
532 } else if (!hed_block_is_eof(prev) &&
533 hed_block_is_inner_virtual(prev) &&
534 prev->phys_pos + prev->t.size == phys_pos) {
535 merger = prev;
536 update_cursors(block, merger, merger->t.size);
537 } else if (!hed_block_is_virtual(block)) {
538 /* Convert physical to virtual */
539 assert(block->dataobj);
540 cache_put(file->cache, block->dataobj);
541 block->dataobj = NULL;
543 list_del_init(&block->lru); /* unlink the block from LRU */
544 hed_block_set_excache(block); /* say it's unlinked */
545 hed_block_set_virtual(block);
546 return;
547 } else
548 /* Already virtual and cannot merge */
549 return;
551 list_splice(&block->refs, &merger->refs);
553 /* Messing with block sizes and unchaining is a bit tricky
554 * since unchain_block() can splay(). So we really need
555 * to recalc_block_recursive() right after we update the size.
556 * If this place turns out to be a hot-spot, we can optimize
557 * the tree operations here. */
558 merger->t.size += block->t.size;
559 recalc_block_recursive(merger);
561 /* Destroy the block */
562 recalc_unchain_block(hed_file_blocks(file), block);
563 file_free_block(file, block);
565 if (killprev)
566 file_kill_block(file, prev);
569 static struct hed_block *
570 split_block(struct file_priv *file, struct hed_block *block,
571 hed_uoff_t splitpoint)
573 struct hed_block *head;
575 head = new_block(file, block->flags & ~HED_BLOCK_EOF);
576 if (!head)
577 return NULL;
579 if ( (head->dataobj = block->dataobj) ) {
580 cache_get(head->dataobj);
581 head->dataoff = block->dataoff;
582 block->dataoff += splitpoint;
583 } else
584 assert(hed_block_is_virtual(block));
586 head->t.size = splitpoint;
587 head->phys_pos = block->phys_pos;
589 block->t.size -= splitpoint;
590 if (!hed_block_is_inserted(block))
591 block->phys_pos += splitpoint;
592 recalc_block_recursive(block);
593 recalc_chain_block_before(hed_file_blocks(file), head, block);
595 move_cursors(block, head, 0, splitpoint - 1, 0);
596 update_cursors(block, block, -splitpoint);
598 return head;
601 /* Replace a chunk in @block with @newblock */
602 static int
603 replace_chunk(struct file_priv *file, struct hed_block *block,
604 hed_uoff_t offset, struct hed_block *newblock)
606 size_t len = newblock->t.size;
607 hed_uoff_t leadlen = offset + len;
609 assert(offset < block->t.size);
610 assert(len <= block->t.size - offset);
612 /* Re-create the head block if necessary */
613 if (offset) {
614 struct hed_block *head;
616 head = new_block(file, block->flags & ~HED_BLOCK_EOF);
617 if (!head)
618 return -1;
619 head->t.size = offset;
620 head->dataobj = block->dataobj;
621 head->dataoff = block->dataoff;
622 head->phys_pos = block->phys_pos;
624 recalc_chain_block_before(hed_file_blocks(file),
625 head, block);
627 /* Move cursors to the head */
628 move_cursors(block, head, 0, offset - 1, 0);
631 /* Move pointers to the new block */
632 assert(leadlen > 0);
633 move_cursors(block, newblock, offset, leadlen - 1, -offset);
635 /* Shorten the tail block */
636 block->t.size -= leadlen;
637 block->dataoff += leadlen;
638 assert(!hed_block_is_inserted(block));
639 block->phys_pos += leadlen;
640 recalc_block_recursive(block);
641 update_cursors(block, block, -leadlen);
643 /* Insert the new block */
644 recalc_chain_block_before(hed_file_blocks(file), newblock, block);
646 /* Kill the leading block if possible */
647 kill_block_if_empty(file, block);
649 return 0;
652 #ifdef HED_CONFIG_SWAP
654 static char *
655 swp_filename(const char *filename)
657 size_t fnlen = strlen(filename);
658 char *swp;
659 char *file;
661 if (!(swp = malloc(fnlen + 9)) )
662 return NULL;
663 strcpy(swp, filename);
665 file = strrchr(swp, '/');
666 file = file ? file + 1 : swp;
667 *file = '.';
668 strcpy(stpcpy(file + 1, filename + (file -swp)), ".hedswp");
669 return swp;
672 static char *
673 newswp_filename(char *swpname)
675 char *ret, *p;
677 ret = swpname;
678 while (!access(ret, F_OK)) {
679 if (ret == swpname) {
680 if (! (ret = strdup(swpname)) )
681 return NULL;
682 p = ret + strlen(ret) - 1;
685 if (*p == 'a') {
686 free(ret);
687 return NULL;
689 --*p;
691 return ret;
695 hed_file_remove_swap(struct hed_file *f)
697 struct file_priv *file = file_private(f);
698 if (remove(file->swpname))
699 return -1;
700 if (rename(file->newswpname, file->swpname))
701 return -1;
703 free(file->newswpname);
704 file->newswpname = file->swpname;
705 return 0;
708 static inline struct file_priv *
709 file_swp_init(const char *name)
711 char *swpname, *newswpname;
712 struct swp_file *swp;
713 struct file_priv *file;
715 swpname = swp_filename(name);
716 if (!swpname)
717 goto fail;
718 newswpname = newswp_filename(swpname);
719 if (!newswpname)
720 goto fail_free_name;
721 swp = swp_init_write(newswpname);
722 if (!swp)
723 goto fail_free_newname;
725 assert(sizeof(struct swp_header) + sizeof(struct file_priv)
726 <= FILE_BLOCK_SIZE);
727 file = swp_private(swp);
728 memset(file, 0, sizeof *file);
730 file->swp = swp;
731 file->swpname = swpname;
732 file->newswpname = newswpname;
734 return file;
736 fail_free_newname:
737 free(newswpname);
738 fail_free_name:
739 if (swpname != newswpname)
740 free(swpname);
741 fail:
742 return NULL;
745 static inline void
746 file_swp_done(struct file_priv *file)
748 remove(file->newswpname);
749 if (file->newswpname != file->swpname)
750 free(file->newswpname);
751 free(file->swpname);
752 swp_done(file_swp(file));
753 /* file was de-allocated automatically with file->swp */
757 hed_file_has_swap(struct hed_file *f)
759 struct file_priv *file = file_private(f);
760 return file->swpname != file->newswpname;
763 char *
764 hed_file_swap_name(struct hed_file *f)
766 struct file_priv *file = file_private(f);
767 return file->swpname;
770 #else /* HED_CONFIG_SWAP */
772 static inline struct file_priv *
773 file_swp_init(const char *name)
775 return calloc(1, sizeof(struct file_priv));
778 static inline void
779 file_swp_done(struct file_priv *file)
781 free(file);
785 hed_file_has_swap(struct hed_file *file)
787 return 0;
791 hed_file_remove_swap(struct hed_file *file)
793 return 0;
796 char *
797 hed_file_swap_name(struct hed_file *file)
799 return NULL;
802 #endif /* HED_CONFIG_SWAP */
804 static inline struct stat *
805 file_stat(struct file_priv *file)
807 return &file->s;
811 hed_file_update_size(struct hed_file *f)
813 struct file_priv *file = file_private(f);
814 hed_uoff_t oldsize = file->f.phys_size;
816 if(fstat(file->fd, file_stat(file)) < 0)
817 return -1;
819 if (S_ISBLK(file_stat(file)->st_mode)) {
820 if (ioctl(file->fd, BLKGETSIZE64, &file->f.phys_size)) {
821 unsigned long size_in_blocks;
822 if (ioctl(file->fd, BLKGETSIZE, &size_in_blocks))
823 return -1;
824 file->f.phys_size = (hed_uoff_t)size_in_blocks << 9;
826 } else if (S_ISREG(file_stat(file)->st_mode)) {
827 file->f.phys_size = file_stat(file)->st_size;
828 } else if (S_ISCHR(file_stat(file)->st_mode)) {
829 if (lseek(file->fd, 0, SEEK_SET) < 0)
830 return -1;
831 file->f.phys_size = (hed_uoff_t)OFF_MAX + 1;
832 } else {
833 errno = EINVAL;
834 return -1;
837 file->f.size += file->f.phys_size - oldsize;
838 return 0;
841 static int
842 do_file_open(struct file_priv *file)
844 file->fd = open(file->f.name, O_RDONLY);
845 if (file->fd < 0) {
846 if (errno != ENOENT)
847 return errno;
848 fprintf(stderr, "Warning: File %s does not exist\n",
849 file->f.name);
850 memset(file_stat(file), 0, sizeof(struct stat));
852 } else if (hed_file_update_size(&file->f)) {
853 int errsv = errno;
854 close(file->fd);
855 return errsv;
857 return 0;
860 static int
861 file_setup_blocks(struct file_priv *file)
863 hed_uoff_t phys_size = file->f.phys_size;
864 struct hed_block *block;
866 block = &file->eof_block;
867 block->flags = HED_BLOCK_EXCACHE | HED_BLOCK_VIRTUAL | HED_BLOCK_EOF;
868 INIT_LIST_HEAD(&block->lru);
869 INIT_LIST_HEAD(&block->refs);
870 block->t.size = OFF_MAX - phys_size + 1;
871 block->phys_pos = phys_size;
873 init_block_list(hed_file_blocks(file), block);
875 if (phys_size) {
876 block = new_virt_block(file, 0, phys_size, 0);
877 if (!block)
878 return -1;
879 recalc_chain_block_before(hed_file_blocks(file), block,
880 &file->eof_block);
883 return 0;
887 libhed_init(void)
889 return file_access_init();
892 struct hed_file *
893 hed_open(const char *name)
895 struct file_priv *file;
897 if (! (file = file_swp_init(name)) )
898 goto fail;
900 file->f.name = name;
902 file->cache = cache_init(CACHE_LENGTH, file_swp(file));
903 if (!file->cache)
904 goto fail_file;
905 INIT_LIST_HEAD(&file->lru);
907 if (do_file_open(file))
908 goto fail_file;
910 if (file_setup_blocks(file))
911 goto fail_file;
913 fixup_register(file);
915 return &file->f;
917 fail_file:
918 hed_close(&file->f);
919 fail:
920 return NULL;
923 void
924 hed_close(struct hed_file *f)
926 struct file_priv *file = file_private(f);
927 assert(file);
929 /* Do not check for errors:
930 * 1. This FD is read-only => no data loss possbile
931 * 2. We're about to exit anyway => no resource leak
933 if (file->fd >= 0)
934 close(file->fd);
936 fixup_deregister(file);
938 /* No need to free file blocks here, because all data is
939 * allocated either from the cache or from the swap file
940 * and both is going to be destroyed now.
943 if (file->cache)
944 cache_done(file->cache);
946 file_swp_done(file);
949 /* Adjust a cursor after off gets outside its block */
950 static void
951 fixup_cursor_slow(hed_cursor_t *curs)
953 struct hed_block *block = curs->block;
954 hed_uoff_t off = curs->off;
956 do {
957 if ((hed_off_t)off < 0) {
958 block = prev_block(block);
959 off += block->t.size;
960 } else {
961 off -= block->t.size;
962 block = next_block(block);
964 } while (off >= block->t.size);
966 curs->block = block;
967 curs->off = off;
968 list_move(&curs->list, &block->refs);
971 /* Adjust a cursor if off gets outside its block.
972 * This is separate from fixup_cursor_slow, because it is supposed
973 * to be small enough to be inlined (which is a win, because most of
974 * the time no fixup has to be done, so the fast inlined path is used).
976 static inline void
977 fixup_cursor(hed_cursor_t *curs)
979 if (curs->off >= curs->block->t.size)
980 fixup_cursor_slow(curs);
983 hed_off_t
984 hed_move_relative(hed_cursor_t *curs, hed_off_t num)
986 hed_off_t newpos = curs->pos + num;
987 if (newpos < 0) {
988 newpos = num < 0 ? 0 : OFF_MAX;
989 num = newpos - curs->pos;
991 curs->pos = newpos;
992 curs->off += num;
993 fixup_cursor(curs);
994 return num;
997 /* Relative move with no checking (and only by a small amount) */
998 static inline void
999 move_rel_fast(hed_cursor_t *curs, ssize_t num)
1001 curs->off += num;
1002 curs->pos += num;
1003 fixup_cursor(curs);
1006 static void
1007 alloc_caches(struct file_priv *file, struct hed_block_data **caches, int n)
1009 struct remap_control rc;
1010 int i;
1012 BDEBUG("Allocate %d caches (%d free slots available)\n",
1013 n, file->cache->nfree);
1015 assert(n <= CACHE_LENGTH);
1016 while (file->cache->nfree < n) {
1017 struct hed_block *block;
1019 assert(!list_empty(&file->lru));
1020 block = list_entry(file->lru.next, struct hed_block, lru);
1021 BDEBUG("Killing block at physical %llx\n", block->phys_pos);
1022 file_kill_block(file, block);
1025 for (i = 0; i < n; ++i) {
1026 caches[i] = cache_alloc(file->cache);
1027 assert(caches[i]);
1030 remap_init(&rc);
1031 remap_compact(&rc, file->cache, caches, n);
1032 for (i = 0; i < n; ++i)
1033 remap_add(&rc, caches[i]->data);
1034 remap_finish(&rc);
1037 static inline void
1038 free_caches(struct file_priv *file, struct hed_block_data **preload, int n)
1040 int i;
1042 for (i = 0; i < n; ++i)
1043 if (preload[i])
1044 cache_put(file->cache, preload[i]);
1047 static int
1048 file_load_data(struct file_priv *file,
1049 struct hed_block_data **caches, int n,
1050 hed_uoff_t offset)
1052 struct hed_block_data *dataobj = caches[0];
1053 void *data = dataobj->data;
1054 ssize_t rsize, total, segsize;
1056 segsize = n << FILE_BLOCK_SHIFT;
1057 for (total = 0; total < segsize; total += rsize) {
1058 rsize = pread(file->fd, data + total,
1059 segsize - total, offset + total);
1060 if (!rsize) {
1061 memset(data + total, 0, segsize - total);
1062 break;
1064 if (rsize < 0) {
1065 dataobj = caches[total >> FILE_BLOCK_SHIFT];
1066 caches[total >> FILE_BLOCK_SHIFT] = NULL;
1067 data = dataobj->data;
1068 cache_put(file->cache, dataobj);
1069 total = FILE_BLOCK_ROUND(total);
1070 rsize = FILE_BLOCK_SIZE;
1071 BDEBUG("Error reading block at phys %llx: %s\n",
1072 offset + total, strerror(errno));
1076 BDEBUG("Loaded data at phys %llx up to %llx\n",
1077 offset, offset + segsize);
1078 return 0;
1081 #ifdef HED_CONFIG_MMAP
1083 static int
1084 file_load_data_mmap(struct file_priv *file,
1085 struct hed_block_data **caches, int n,
1086 hed_uoff_t offset)
1088 void *data;
1089 ssize_t segsize;
1090 int i;
1092 segsize = n << FILE_BLOCK_SHIFT;
1093 data = mmap(NULL, segsize,
1094 PROT_READ | PROT_WRITE,
1095 MAP_PRIVATE | (file->fd < 0 ? MAP_ANONYMOUS : 0),
1096 file->fd, offset);
1098 if (data == MAP_FAILED) {
1099 BDEBUG("mmap failed at %llx: fail over to traditional read\n",
1100 offset);
1102 data = mmap(NULL, segsize,
1103 PROT_READ | PROT_WRITE,
1104 MAP_PRIVATE | MAP_ANONYMOUS,
1105 0, 0);
1106 if (data == MAP_FAILED)
1107 return -1;
1109 for (i = 0; i < n; ++i)
1110 caches[i]->data = data + (i << FILE_BLOCK_SHIFT);
1111 return file_load_data(file, caches, n, offset);
1114 for (i = 0; i < n; ++i)
1115 caches[i]->data = data + (i << FILE_BLOCK_SHIFT);
1117 BDEBUG("Loaded data at phys %llx up to %llx\n",
1118 offset, offset + segsize);
1119 return 0;
1121 # define file_load_data file_load_data_mmap
1123 #endif
1125 /* Find the block with the lowest physical position that intersects
1126 * the loaded segment. The search starts at @block.
1128 static struct hed_block *
1129 first_load_block(struct hed_tree *tree, struct hed_block *block,
1130 hed_uoff_t segstart)
1132 struct hed_block *prev = block;
1133 do {
1134 block = prev;
1135 prev = prev_block(prev);
1136 } while (!hed_block_is_eof(prev) && phys_end(prev) > segstart);
1137 return block;
1140 static int
1141 load_blocks(struct file_priv *file, const hed_cursor_t *from)
1143 hed_uoff_t physpos, segstart;
1144 struct hed_block_data *preload[FILE_READAHEAD];
1145 size_t ra_bkw, ra_fwd, ra_off;
1146 hed_cursor_t pos;
1147 int nblocks;
1149 segstart = hed_cursor_phys_pos(from);
1150 ra_bkw = FILE_BLOCK_OFF(segstart);
1151 ra_fwd = FILE_BLOCK_SIZE - ra_bkw;
1153 if (file_ra_forward(file))
1154 ra_fwd += (FILE_READAHEAD - 1) << FILE_BLOCK_SHIFT;
1155 else if (file_ra_backward(file))
1156 ra_bkw += (FILE_READAHEAD - 1) << FILE_BLOCK_SHIFT;
1158 if (ra_bkw > segstart)
1159 ra_bkw = segstart;
1160 if (ra_fwd > file->f.phys_size - segstart)
1161 ra_fwd = file->f.phys_size - segstart;
1163 segstart -= ra_bkw;
1164 ra_fwd += ra_bkw;
1165 pos.block = first_load_block(hed_file_blocks(file),
1166 from->block, segstart);
1167 pos.off = segstart >= pos.block->phys_pos
1168 ? segstart - pos.block->phys_pos
1169 : 0;
1171 list_add(&pos.list, &pos.block->refs);
1172 nblocks = ((ra_fwd - 1) >> FILE_BLOCK_SHIFT) + 1;
1173 alloc_caches(file, preload, nblocks);
1174 put_cursor(&pos);
1176 if (file_load_data(file, preload, nblocks, segstart)) {
1177 free_caches(file, preload, nblocks);
1178 return -1;
1181 while (physpos = hed_cursor_phys_pos(&pos),
1182 ra_off = physpos - segstart,
1183 ra_off < ra_fwd) {
1184 struct hed_block_data *dataobj;
1185 struct hed_block *newblock;
1186 size_t datalen;
1188 if (!hed_block_is_virtual(pos.block)) {
1189 pos.block = next_block(pos.block);
1190 pos.off = 0;
1191 continue;
1194 datalen = FILE_BLOCK_SIZE - FILE_BLOCK_OFF(physpos);
1195 if (datalen > hed_block_size(pos.block) - pos.off)
1196 datalen = hed_block_size(pos.block) - pos.off;
1198 dataobj = preload[ra_off >> FILE_BLOCK_SHIFT];
1199 newblock = dataobj
1200 ? new_data_block(file, physpos, datalen, dataobj)
1201 : new_virt_block(file, physpos, datalen,
1202 HED_BLOCK_BAD);
1204 /* Punch the new block */
1205 BDEBUG("Add %s block at %llx, length %llx\n",
1206 hed_block_is_virtual(newblock) ? "error" : "physical",
1207 newblock->phys_pos, newblock->t.size);
1208 if (replace_chunk(file, pos.block, pos.off, newblock)) {
1209 file_free_block(file, newblock);
1210 free_caches(file, preload, nblocks);
1211 return -1;
1214 pos.block = next_block(newblock);
1215 pos.off = 0;
1218 /* All cache objects now have an extra reference from the
1219 * allocation. Drop it. */
1220 free_caches(file, preload, nblocks);
1222 dump_blocks(file);
1223 return 0;
1226 /* Shorten a block at beginning and enlarge the preceding block.
1228 * Re-allocate at most @len bytes from the beginning of @block to the
1229 * end of the preceding block.
1230 * If @block is virtual, this will effectively devirtualize the range.
1231 * If @block is not virtual, this will change the backing store of
1232 * the bytes in the range.
1233 * Returns: the number of bytes actually moved.
1235 static size_t
1236 shrink_at_begin(struct file_priv *file, struct hed_block *block,
1237 size_t len, long state)
1239 struct hed_block *prev = prev_block(block);
1240 size_t maxgrow;
1242 /* Basic assumptions */
1243 assert(!(state & HED_BLOCK_VIRTUAL));
1245 /* The previous block must exist: */
1246 if (hed_block_is_eof(prev))
1247 return 0;
1249 /* The block flags must match the requested @state: */
1250 if ((prev->flags & HED_BLOCK_STATEMASK) != state)
1251 return 0;
1253 /* No deletions at end, or similar: */
1254 if (prev->phys_pos + prev->t.size != block->phys_pos)
1255 return 0;
1257 /* Append less bytes than requested if not all are available */
1258 assert(prev->t.size <= prev->dataobj->size - prev->dataoff);
1259 maxgrow = prev->dataobj->size - prev->dataoff - prev->t.size;
1260 if (len > maxgrow)
1261 len = maxgrow;
1262 if (!len)
1263 return 0;
1265 BDEBUG("Appending 0:%lx to the previous block\n", len);
1267 /* Move cursors away from the to-be-chopped beginning */
1268 move_cursors(block, prev, 0, len - 1, prev->t.size);
1270 /* Enlarge the previous block */
1271 prev->t.size += len;
1272 recalc_block_recursive(prev);
1274 /* Shorten the original block */
1275 block->t.size -= len;
1276 block->dataoff += len;
1277 block->phys_pos += len;
1278 recalc_block_recursive(block);
1279 return len;
1282 /* Shorten a block at end and enlarge the following block.
1284 * Re-allocate at most @len bytes from the end of @block to the
1285 * beginning of the following block.
1286 * If @block is virtual, this will effectively devirtualize the range.
1287 * If @block is not virtual, this will change the backing store of
1288 * the bytes in the range.
1289 * Returns: the number of bytes actually moved.
1291 static size_t
1292 shrink_at_end(struct file_priv *file, struct hed_block *block,
1293 size_t len, long state)
1295 struct hed_block *next = next_block(block);
1296 hed_uoff_t off;
1298 /* Basic assumptions */
1299 assert(!(state & HED_BLOCK_VIRTUAL));
1301 /* The next block must exist: */
1302 if (hed_block_is_eof(block))
1303 return 0;
1305 /* The block flags must match the requested @state: */
1306 if ((next->flags & HED_BLOCK_STATEMASK) != state)
1307 return 0;
1309 /* No deletions at end, or similar: */
1310 if (block->phys_pos + block->t.size != next->phys_pos)
1311 return 0;
1313 /* Prepend less bytes than requested if not all are available */
1314 if (len > next->dataoff)
1315 len = next->dataoff;
1316 if (!len)
1317 return 0;
1318 off = block->t.size - len;
1320 BDEBUG("Prepending %llx:%lx to the next block\n", off, len);
1322 /* Shift cursors in the new physical block */
1323 update_cursors(next, next, len);
1325 /* Move cursors away from the to-be-chopped end */
1326 move_cursors(block, next, off, UOFF_MAX, -off);
1328 /* Enlarge the next block */
1329 next->dataoff -= len;
1330 next->phys_pos -= len;
1331 next->t.size += len;
1332 recalc_block_recursive(next);
1334 /* Shorten the original block */
1335 block->t.size -= len;
1336 recalc_block_recursive(block);
1337 return len;
1340 /* Search for an existing data object within the same physical block
1341 * as @curs, and having the given @state flags.
1343 static struct hed_block_data *
1344 search_data(struct file_priv *file, const hed_cursor_t *curs, long state)
1346 struct hed_block *block;
1347 hed_uoff_t physpos;
1349 physpos = FILE_BLOCK_ROUND(curs->block->phys_pos + curs->off);
1350 BDEBUG("Search for already loaded data at %llx starting at %llx...",
1351 physpos, curs->block->phys_pos);
1353 /* Search backwards */
1354 block = curs->block;
1355 while (!hed_block_is_eof(block = prev_block(block))) {
1356 if (block->phys_pos < physpos)
1357 break;
1358 if ((block->flags & HED_BLOCK_STATEMASK) == state) {
1359 BDEBUG(" found at %llx\n", block->phys_pos);
1360 assert(block->dataobj);
1361 return block->dataobj;
1365 /* Search forwards */
1366 block = curs->block;
1367 while (!hed_block_is_eof(block)) {
1368 block = next_block(block);
1369 if (block->phys_pos >= physpos + FILE_BLOCK_SIZE)
1370 break;
1371 if ((block->flags & HED_BLOCK_STATEMASK) == state) {
1372 BDEBUG(" found at %llx\n", block->phys_pos);
1373 assert(block->dataobj);
1374 return block->dataobj;
1378 BDEBUG(" not found\n");
1379 return NULL;
1382 static int
1383 reuse_loaded_data(struct file_priv *file, const hed_cursor_t *curs,
1384 struct hed_block_data *data)
1386 struct hed_block *physblock;
1387 struct hed_block *block = curs->block;
1388 hed_uoff_t block_offset = curs->off;
1389 hed_uoff_t physpos = block->phys_pos + block_offset;
1390 size_t part = FILE_BLOCK_OFF(physpos);
1391 size_t len =
1392 FILE_BLOCK_SIZE - part <= block->t.size - block_offset
1393 ? FILE_BLOCK_SIZE - part
1394 : block->t.size - block_offset;
1396 if (part > block_offset)
1397 part = block_offset;
1398 physpos -= part;
1399 len += part;
1400 block_offset -= part;
1402 if (! (physblock = new_data_block(file, physpos, len, data)) )
1403 return -1;
1405 BDEBUG("Add physical block at %llx, length %llx\n",
1406 physblock->phys_pos, physblock->t.size);
1407 if (replace_chunk(file, block, block_offset, physblock)) {
1408 file_free_block(file, physblock);
1409 return -1;
1412 dump_blocks(file);
1413 return 0;
1416 /* Replace a part of a virtual block with content loaded
1417 * from disk. The amount of data loaded from the disk depends
1418 * on various factors with the goal to choose the most efficient
1419 * ratio. The only guarantee is that the byte at @curs will
1420 * be in a non-virtual block when this function returns 0.
1422 static int
1423 devirtualize_clean(struct file_priv *file, const hed_cursor_t *curs)
1425 struct hed_block *block = curs->block;
1426 hed_uoff_t block_offset = curs->off;
1427 hed_uoff_t remain = block->t.size - block_offset;
1428 struct hed_block_data *data;
1430 BDEBUG("punch a clean hole at %llx into %llx:%llx\n", block_offset,
1431 block_offset(block), block->t.size);
1432 assert(hed_block_is_virtual(block));
1434 /* Check if we can combine with a neighbouring block */
1435 if (shrink_at_begin(file, block,
1436 hed_block_size(block), 0) > block_offset
1437 || shrink_at_end(file, block,
1438 hed_block_size(block), 0) >= remain) {
1439 kill_block_if_empty(file, block);
1440 dump_blocks(file);
1441 return 0;
1444 /* Check if the block is already loaded elsewhere */
1445 data = search_data(file, curs, 0);
1446 return data
1447 ? reuse_loaded_data(file, curs, data)
1448 : load_blocks(file, curs);
1451 /* Unles the block at @curs is already dirty, replace at most
1452 * @len bytes at @curs with a newly allocated out-of-cache block.
1453 * The block is marked dirty and its data is left uninitialized.
1454 * Note that this function may devirtualize less than @len bytes.
1455 * In the worst case only 1 byte at @curs will be available.
1457 static int
1458 prepare_modify(struct file_priv *file, hed_cursor_t *curs, size_t len)
1460 struct hed_block *block = curs->block;
1461 hed_uoff_t block_offset = curs->off;
1462 hed_uoff_t remain = block->t.size - block_offset;
1463 struct hed_block *newblock;
1465 if (hed_block_is_dirty(block))
1466 return 0;
1468 if (len > remain)
1469 len = remain;
1471 BDEBUG("punch a dirty hole at %llx:%lx into %llx:%llx\n",
1472 block_offset, len,
1473 block_offset(block), block->t.size);
1475 /* Check if we can combine with a neighbouring block */
1476 if ((block_offset == 0 &&
1477 shrink_at_begin(file, block, len, HED_BLOCK_DIRTY)) ||
1478 (remain == len &&
1479 shrink_at_end(file, block, len, HED_BLOCK_DIRTY) >= len)) {
1480 kill_block_if_empty(file, block);
1481 dump_blocks(file);
1482 return 0;
1485 /* Initialize a new block */
1486 newblock = new_block(file, HED_BLOCK_EXCACHE | HED_BLOCK_DIRTY);
1487 if (!newblock)
1488 goto out_err;
1490 /* Allocate data */
1491 if ( (newblock->dataobj = search_data(file, curs, HED_BLOCK_DIRTY)) )
1492 cache_get(newblock->dataobj);
1493 else if (! (newblock->dataobj = block_data_new(file->cache,
1494 FILE_BLOCK_SIZE)) )
1495 goto out_err_free;
1497 newblock->phys_pos = block->phys_pos + block_offset;
1498 newblock->dataoff = FILE_BLOCK_OFF(newblock->phys_pos);
1499 if (len > FILE_BLOCK_SIZE - newblock->dataoff)
1500 len = FILE_BLOCK_SIZE - newblock->dataoff;
1501 newblock->t.size = len;
1503 if (replace_chunk(file, block, block_offset, newblock))
1504 goto out_err_free;
1506 dump_blocks(file);
1507 return 0;
1509 out_err_free:
1510 file_free_block(file, newblock);
1511 out_err:
1512 return -1;
1515 /* Ensure that @curs points to an up-to-date non-virtual block.
1516 * Load the data from disk if necessary, return zero on failure. */
1517 size_t
1518 hed_prepare_read(struct hed_file *f, const hed_cursor_t *curs, size_t len)
1520 struct file_priv *file = file_private(f);
1521 struct hed_block *block = curs->block;
1522 if (hed_block_is_inner_virtual(block) &&
1523 devirtualize_clean(file, curs) < 0)
1524 return 0;
1526 return hed_cursor_chunk_len(curs, len);
1529 /* Move the block pointer to the next block */
1530 static struct hed_block *
1531 cursor_next_block(hed_cursor_t *curs)
1533 struct hed_block *block = next_nonzero_block(curs->block);
1535 if (block) {
1536 curs->block = block;
1537 curs->off = 0;
1538 list_move(&curs->list, &block->refs);
1540 return block;
1543 /* This is an optimized way of doing:
1545 * hed_move_relative(curs, curs->block->t.size);
1547 * for the case when curs->off == 0.
1549 static struct hed_block *
1550 move_to_next(hed_cursor_t *curs)
1552 curs->pos += hed_block_size(curs->block);
1553 return cursor_next_block(curs);
1556 /* Copy in @count bytes from @pos.
1557 * Returns the number of bytes that were not read (i.e. zero on success).
1558 * The @pos cursor is moved by the amount of data read.
1559 * CAUTION: If you read up to MAX_OFF, then @pos points one byte
1560 * beyond the EOF block upon return.
1562 static size_t
1563 copy_in(struct file_priv *file, void *buf, size_t count, hed_cursor_t *pos)
1565 size_t cpylen;
1567 pos->pos += count;
1568 while (count && (cpylen = hed_prepare_read(&file->f, pos, count))) {
1569 if (hed_block_is_virtual(pos->block))
1570 memset(buf, 0, cpylen);
1571 else
1572 memcpy(buf, hed_cursor_data(pos), cpylen);
1574 buf += cpylen;
1575 count -= cpylen;
1576 if ( (pos->off += cpylen) >= hed_block_size(pos->block) )
1577 if (!cursor_next_block(pos))
1578 break;
1580 pos->pos -= count;
1581 return count;
1584 size_t
1585 hed_file_cpin(struct hed_file *file, void *buf, size_t count,
1586 const hed_cursor_t *pos)
1588 hed_cursor_t mypos;
1589 size_t ret;
1591 hed_dup_cursor(pos, &mypos);
1592 ret = copy_in(file_private(file), buf, count, &mypos);
1593 put_cursor(&mypos);
1594 return ret;
1597 /* Set the modified flag */
1598 static inline void
1599 set_modified(struct file_priv *file)
1601 file->f.modified = true;
1604 /* Clear the modified flag */
1605 static inline void
1606 clear_modified(struct file_priv *file)
1608 file->f.modified = false;
1612 hed_file_set_byte(struct hed_file *f, hed_cursor_t *curs, unsigned char byte)
1614 struct file_priv *file = file_private(f);
1615 hed_uoff_t offset = curs->pos;
1617 if (prepare_modify(file, curs, 1))
1618 return -1;
1619 set_modified(file);
1621 if (offset >= file->f.size)
1622 file->f.size = offset + 1;
1624 hed_block_data(curs->block)[curs->off] = byte;
1625 return 0;
1628 size_t
1629 hed_file_set_block(struct hed_file *f, hed_cursor_t *curs,
1630 unsigned char *buf, size_t len)
1632 struct file_priv *file = file_private(f);
1633 while (len) {
1634 size_t span;
1636 if (prepare_modify(file, curs, len))
1637 break;
1638 set_modified(file);
1640 span = hed_cursor_chunk_len(curs, len);
1641 memcpy(hed_cursor_data(curs), buf, span);
1642 buf += span;
1643 len -= span;
1644 move_rel_fast(curs, span);
1646 if (curs->pos > file->f.size)
1647 file->f.size = curs->pos;
1649 return len;
1652 hed_uoff_t
1653 hed_file_set_bytes(struct hed_file *f, hed_cursor_t *curs,
1654 unsigned char byte, hed_uoff_t rep)
1656 struct file_priv *file = file_private(f);
1657 while (rep) {
1658 size_t span;
1660 if (prepare_modify(file, curs, rep))
1661 break;
1662 set_modified(file);
1664 span = hed_cursor_chunk_len(curs, rep);
1665 memset(hed_cursor_data(curs), byte, span);
1666 rep -= span;
1667 move_rel_fast(curs, span);
1669 if (curs->pos > file->f.size)
1670 file->f.size = curs->pos;
1672 return rep;
1675 static int
1676 file_erase_continuous(struct file_priv *file, hed_cursor_t *curs, size_t len)
1678 struct hed_block *block = curs->block;
1679 hed_uoff_t block_offset = curs->off;
1681 /* Find the new position */
1682 hed_move_relative(curs, len);
1684 /* Move all other cursors in the erased range to the new position */
1685 assert(len > 0);
1686 move_cursors_abs(block, block_offset,
1687 block_offset + len - 1, curs);
1689 if (!block_offset) {
1690 block->dataoff += len;
1691 if (!hed_block_is_inserted(block))
1692 block->phys_pos += len;
1693 } else if (block_offset + len < block->t.size) {
1694 block = split_block(file, block, block_offset + len);
1695 if (!block)
1696 return -1;
1699 move_cursors(block, block, block_offset, UOFF_MAX, -(hed_uoff_t)len);
1701 block->t.size -= len;
1702 recalc_block_recursive(block);
1704 kill_block_if_empty(file, block);
1705 return 0;
1708 size_t
1709 hed_file_erase_block(struct hed_file *f, hed_cursor_t *curs, hed_uoff_t len)
1711 struct file_priv *file = file_private(f);
1712 hed_uoff_t offset;
1713 hed_uoff_t todo;
1715 offset = curs->pos;
1716 if (offset > hed_file_size(&file->f))
1717 return 0;
1718 else if (len > hed_file_size(&file->f) - offset)
1719 len = hed_file_size(&file->f) - offset;
1721 todo = len;
1722 while (todo) {
1723 size_t span = hed_cursor_chunk_len(curs, todo);
1725 if (file_erase_continuous(file, curs, span))
1726 break;
1728 todo -= span;
1730 len -= todo;
1732 file->f.size -= len;
1733 set_modified(file);
1735 file->eof_block.t.size += len;
1736 recalc_block_recursive(&file->eof_block);
1738 struct hed_block *slideblock = prev_block(curs->block);
1739 if (hed_block_is_eof(slideblock))
1740 slideblock = curs->block;
1741 slide_cursors(slideblock, curs->pos, -len);
1743 return todo;
1747 hed_file_insert_begin(struct hed_file *f, const hed_cursor_t *curs,
1748 hed_cursor_t *curs_ins)
1750 struct file_priv *file = file_private(f);
1751 struct hed_block *newblock;
1753 BDEBUG("Starting insert at %llx\n", curs->pos);
1755 newblock = new_block(file,
1756 HED_BLOCK_EXCACHE | HED_BLOCK_INSERTED);
1757 if (!newblock)
1758 return -1;
1760 newblock->phys_pos = hed_cursor_phys_pos(curs);
1761 newblock->dataobj = block_data_new(file->cache, FILE_BLOCK_SIZE);
1762 if (!newblock->dataobj) {
1763 file_free_block(file, newblock);
1764 return -1;
1767 if (curs->off && !split_block(file, curs->block, curs->off)) {
1768 file_free_block(file, newblock);
1769 return -1;
1772 chain_block_before(hed_file_blocks(file), newblock, curs->block);
1774 curs_ins->pos = curs->pos;
1775 curs_ins->off = newblock->t.size;
1776 curs_ins->block = newblock;
1777 list_add(&curs_ins->list, &newblock->refs);
1779 dump_blocks(file);
1780 return 0;
1783 void
1784 hed_file_insert_end(struct hed_file *f, hed_cursor_t *curs_ins)
1786 struct file_priv *file = file_private(f);
1787 struct hed_block *block = curs_ins->block;
1789 BDEBUG("End insert at %llx\n", curs_ins->pos);
1791 curs_ins->block = NULL;
1792 list_del(&curs_ins->list);
1793 if (!kill_block_if_empty(file, block))
1794 block_data_shrink(file->cache, block->dataobj,
1795 block->dataoff + block->t.size);
1797 dump_blocks(file);
1800 static void
1801 insert_block(struct file_priv *file, hed_cursor_t *curs,
1802 unsigned char *buf, size_t len)
1804 struct hed_block *block = curs->block;
1805 hed_uoff_t offset = curs->pos;
1807 assert(file && offset >= 0);
1809 assert(hed_block_is_excache(block));
1810 hed_block_set_dirty(block);
1811 set_modified(file);
1813 memcpy(hed_block_data(block) + curs->off, buf, len);
1814 block->t.size += len;
1815 recalc_block_recursive(block);
1816 curs->off += len;
1817 curs->pos += len;
1819 if (curs->pos > file->f.size)
1820 file->f.size = curs->pos;
1821 else
1822 file->f.size += len;
1824 slide_cursors(next_block(block), offset, len);
1827 size_t
1828 hed_file_insert_block(struct hed_file *f, hed_cursor_t *curs,
1829 unsigned char *buf, size_t len)
1831 struct file_priv *file = file_private(f);
1832 while (len) {
1833 struct hed_block *block = curs->block;
1834 size_t remain = block->dataobj->size - curs->off;
1836 if (!remain) {
1837 list_del(&curs->list);
1838 curs->block = next_block(block);
1839 curs->off = 0;
1841 if (!hed_file_insert_begin(&file->f, curs, curs))
1842 continue;
1844 curs->block = block;
1845 curs->off = block->t.size;
1846 list_add(&curs->list, &block->refs);
1847 break;
1850 if (remain > len)
1851 remain = len;
1853 BDEBUG("Append %ld bytes to the insert block\n",
1854 (long) remain);
1855 insert_block(file, curs, buf, remain);
1856 buf += remain;
1857 len -= remain;
1859 return len;
1863 hed_file_insert_byte(struct hed_file *file, hed_cursor_t *curs,
1864 unsigned char byte)
1866 return hed_file_insert_block(file, curs, &byte, 1);
1869 size_t
1870 hed_file_insert_once(struct hed_file *file, hed_cursor_t *curs,
1871 unsigned char *buf, size_t len)
1873 hed_cursor_t insert;
1875 if (!hed_file_insert_begin(file, curs, &insert)) {
1876 len = hed_file_insert_block(file, &insert, buf, len);
1877 hed_file_insert_end(file, &insert);
1879 return len;
1882 struct commit_control {
1883 struct file_priv *file;
1884 int wfd; /* file descriptor for writing */
1885 int needwrite; /* non-zero if write is needed */
1886 hed_cursor_t begoff, endoff;
1887 hed_off_t shift;
1888 void *partbuf; /* allocated 3*FILE_BLOCK_SIZE */
1889 void *partial; /* pointer into partbuf */
1892 /* Get the logical<->physical shift value after the specified block.
1893 * It is essential to get the value _AFTER_ the block, because the
1894 * shift value is used to decide how the current block will be handled.
1896 static hed_off_t
1897 get_shift(const hed_cursor_t *curs)
1899 struct hed_block *block = curs->block;
1900 size_t curshift = hed_block_is_inserted(block) ? block->t.size : 0;
1901 return curshift + curs->pos - curs->off - block->phys_pos;
1904 static void
1905 switch_partial(struct commit_control *cc)
1907 cc->partial += FILE_BLOCK_SIZE;
1908 if (cc->partial >= cc->partbuf + 3*FILE_BLOCK_SIZE)
1909 cc->partial = cc->partbuf;
1912 /* Write @writelen bytes from the partial buffer at @cc->begoff. */
1913 static int
1914 commit_block(struct commit_control *cc, size_t len)
1916 ssize_t written;
1918 assert(len > 0);
1919 BDEBUG(" -> write %lx bytes at %llx\n",
1920 (unsigned long)len, cc->begoff.pos - len);
1921 written = pwrite(cc->wfd, cc->partial, len, cc->begoff.pos - len);
1922 if (written < len)
1923 /* TODO: keep data in a new list of dirty blocks */
1924 return -1;
1925 return 0;
1928 static int
1929 commit_partial(struct commit_control *cc)
1931 size_t partoff, remain, left;
1932 size_t writelen;
1934 partoff = FILE_BLOCK_OFF(cc->begoff.pos);
1935 remain = FILE_BLOCK_SIZE - partoff;
1936 if (remain > cc->endoff.pos - cc->begoff.pos)
1937 remain = cc->endoff.pos - cc->begoff.pos;
1938 if ((writelen = partoff + remain) == 0)
1939 return 0;
1941 BDEBUG("Fill partial %llx-%llx\n",
1942 cc->begoff.pos, cc->begoff.pos + remain);
1944 left = copy_in(cc->file, cc->partial + partoff, remain, &cc->begoff);
1945 if (left) {
1946 hed_move_relative(&cc->begoff, left);
1947 return -1;
1950 if (FILE_BLOCK_OFF(cc->begoff.pos) &&
1951 !hed_block_is_eof(cc->begoff.block))
1952 return 0;
1954 return commit_block(cc, writelen);
1957 /* Commit forwards.
1958 * Beware, cc->begoff is undefined upon return!
1960 static int
1961 commit_forwards(struct commit_control *cc)
1963 hed_uoff_t endpos = cc->endoff.pos;
1964 int ret = 0;
1966 BDEBUG("Writing forwards %llx-%llx\n",
1967 cc->begoff.pos, cc->endoff.pos);
1969 if (!cc->needwrite)
1970 return 0;
1972 while (cc->begoff.pos < endpos)
1973 ret |= commit_partial(cc);
1975 return ret;
1978 /* Commit forwards.
1979 * Beware, cc->begoff is undefined upon return!
1981 static int
1982 commit_backwards(struct commit_control *cc)
1984 void *retpartial = cc->partial;
1985 hed_uoff_t begpos = cc->begoff.pos;
1986 hed_uoff_t blkpos; /* start of current partial block */
1987 int ret = 0;
1989 BDEBUG("Writing backwards %llx-%llx\n",
1990 cc->begoff.pos, cc->endoff.pos);
1992 if (!cc->needwrite)
1993 return 0;
1995 blkpos = FILE_BLOCK_ROUND(cc->endoff.pos);
1996 if (blkpos <= begpos)
1997 goto final;
1999 /* Handle the trailing partial block */
2000 hed_update_cursor(&cc->file->f, blkpos, &cc->begoff);
2001 switch_partial(cc);
2002 ret |= commit_partial(cc);
2003 retpartial = cc->partial;
2005 /* Handle the middle part */
2006 switch_partial(cc);
2007 while ( (blkpos -= FILE_BLOCK_SIZE) > begpos) {
2008 hed_update_cursor(&cc->file->f, blkpos, &cc->begoff);
2009 ret |= commit_partial(cc);
2011 switch_partial(cc); /* wrap around */
2013 final:
2014 /* Handle the first block (partiall or not) */
2015 hed_update_cursor(&cc->file->f, begpos, &cc->begoff);
2016 ret |= commit_partial(cc);
2018 cc->partial = retpartial;
2019 return ret;
2022 /* Handle the partial block before a skipped one. */
2023 static int
2024 begin_skip(struct commit_control *cc)
2026 size_t minsize = FILE_BLOCK_SIZE - FILE_BLOCK_OFF(cc->endoff.pos);
2027 size_t remain;
2028 int ret = 0;
2030 /* Check if at least one complete physical block can be skipped */
2031 if (cc->endoff.block->t.size < minsize)
2032 return 0;
2034 /* Write out the partially dirty block */
2035 remain = FILE_BLOCK_OFF(minsize);
2036 hed_move_relative(&cc->endoff, remain);
2037 if (cc->shift <= 0)
2038 ret |= commit_forwards(cc);
2039 else
2040 ret |= commit_backwards(cc);
2041 hed_move_relative(&cc->endoff, -(hed_off_t)remain);
2042 hed_dup2_cursor(&cc->endoff, &cc->begoff);
2044 cc->needwrite = 0;
2045 return ret;
2048 /* Handle the last partially skipped physical block. */
2049 static int
2050 end_skip(struct commit_control *cc)
2052 size_t partlen;
2053 int ret = 0;
2055 /* Find the beginning of the physical block */
2056 hed_dup2_cursor(&cc->endoff, &cc->begoff);
2057 partlen = FILE_BLOCK_OFF(cc->begoff.pos);
2058 hed_move_relative(&cc->begoff, -(hed_off_t)partlen);
2060 /* Read the partial data before this block */
2061 if (hed_file_cpin(&cc->file->f, cc->partial, partlen, &cc->begoff))
2062 ret = -1;
2064 cc->needwrite = 1;
2065 return ret;
2068 static void
2069 undirty_blocks(struct file_priv *file)
2071 struct hed_block *block, *next;
2072 hed_uoff_t pos = 0;
2074 BDEBUG("Undirtying blocks:\n");
2075 dump_blocks(file);
2077 next = first_block(file);
2078 while (!hed_block_is_eof(next)) {
2079 block = next;
2080 next = next_block(block);
2082 if (kill_block_if_empty(file, block))
2083 continue;
2085 if (!hed_block_is_virtual(block)) {
2086 cache_put(file->cache, block->dataobj);
2087 block->dataobj = NULL;
2088 list_del_init(&block->lru);
2089 block->flags = HED_BLOCK_EXCACHE | HED_BLOCK_VIRTUAL;
2092 block->phys_pos = pos;
2093 pos += block->t.size;
2096 block = first_block(file);
2097 while (!hed_block_is_eof(block)) {
2098 next = next_block(block);
2099 file_kill_block(file, block);
2100 block = next;
2103 BDEBUG("After undirtying\n");
2104 dump_blocks(file);
2107 static int
2108 commit_init(struct commit_control *cc, struct file_priv *file)
2110 cc->file = file;
2112 cc->partbuf = malloc(3*FILE_BLOCK_SIZE);
2113 if (!cc->partbuf)
2114 goto err;
2116 cc->wfd = open(file->f.name,
2117 O_RDWR | (file->fd < 0 ? O_CREAT : 0), 0666);
2118 if (cc->wfd < 0)
2119 goto err_free;
2121 if (file->fd < 0 &&
2122 (file->fd = open(file->f.name, O_RDONLY)) < 0)
2123 goto err_close;
2125 return 0;
2127 err_close:
2128 close(cc->wfd);
2129 err_free:
2130 free(cc->partbuf);
2131 err:
2132 return -1;
2136 hed_file_commit(struct hed_file *f)
2138 struct file_priv *file = file_private(f);
2139 struct commit_control cc;
2140 int ret = 0;
2142 if (commit_init(&cc, file))
2143 return -1;
2145 dump_blocks(file);
2147 cc.partial = cc.partbuf;
2148 get_cursor(file, 0,&cc.begoff);
2149 hed_dup_cursor(&cc.begoff, &cc.endoff);
2150 cc.shift = -cc.begoff.block->phys_pos;
2151 cc.needwrite = 0;
2152 while(!hed_block_is_eof(cc.endoff.block)) {
2153 hed_off_t newshift = cc.endoff.pos < file->f.phys_size
2154 ? get_shift(&cc.endoff)
2155 : 0;
2157 if (cc.shift <= 0 && newshift > 0) {
2158 ret |= commit_forwards(&cc);
2159 hed_dup2_cursor(&cc.endoff, &cc.begoff);
2160 } else if (cc.shift > 0 && newshift <= 0) {
2161 ret |= commit_backwards(&cc);
2162 hed_dup2_cursor(&cc.endoff, &cc.begoff);
2164 cc.shift = newshift;
2166 if (!newshift && !hed_block_is_dirty(cc.endoff.block)) {
2167 if (cc.needwrite)
2168 ret |= begin_skip(&cc);
2169 } else if (!cc.needwrite)
2170 ret |= end_skip(&cc);
2172 if (!move_to_next(&cc.endoff))
2173 break;
2175 assert(cc.endoff.pos == hed_file_size(&file->f));
2177 if (cc.begoff.pos < hed_file_size(&file->f)) {
2178 if (cc.shift <= 0)
2179 ret |= commit_forwards(&cc);
2180 else
2181 ret |= commit_backwards(&cc);
2184 put_cursor(&cc.begoff);
2185 put_cursor(&cc.endoff);
2187 ftruncate(cc.wfd, hed_file_size(&file->f));
2188 file->f.phys_size = hed_file_size(&file->f);
2190 ret |= close(cc.wfd);
2191 free(cc.partbuf);
2193 undirty_blocks(file);
2195 if (!ret)
2196 clear_modified(file);
2198 return ret;
2201 #ifdef HED_CONFIG_SWAP
2203 hed_file_write_swap(struct hed_file *file)
2205 return swp_write(file_swp(file_private(file)));
2208 static int
2209 do_read_swap(struct file_priv *file, struct swp_file *swp, hed_cursor_t *pos)
2211 struct file_priv *swpfile = swp_private(swp);
2212 struct hed_block *cur, block;
2213 hed_uoff_t phys_pos;
2215 if (file_stat(swpfile)->st_size != file_stat(file)->st_size ||
2216 file_stat(swpfile)->st_mtime != file_stat(file)->st_mtime) {
2217 fprintf(stderr, "stat info mismatch (you modified the file since hed ran on it; refusing to touch it)\n");
2218 return -1;
2221 BDEBUG("Swap header match\n");
2223 phys_pos = 0;
2224 cur = first_block(swpfile);
2225 do {
2226 struct hed_block_data dataobj;
2227 size_t (*mergefn)(struct hed_file*, hed_cursor_t*,
2228 unsigned char*, size_t);
2229 void *data;
2230 size_t res;
2232 if (swp_cpin(swp, &block, cur, sizeof(struct hed_block))) {
2233 perror("Cannot read block descriptor");
2234 return -1;
2236 BDEBUG("BLOCK %p: flags %02lx phys 0x%02llx size 0x%llx\n",
2237 cur, block.flags, (long long)block.phys_pos,
2238 (long long)hed_block_size(&block));
2240 if (block.phys_pos - phys_pos) {
2241 if (hed_file_erase_block(&file->f, pos,
2242 block.phys_pos - phys_pos)) {
2243 perror("Cannot erase");
2244 return -1;
2246 phys_pos = block.phys_pos;
2249 if (!hed_block_is_inserted(&block))
2250 phys_pos += hed_block_size(&block);
2252 if (!hed_block_is_dirty(&block)) {
2253 hed_move_relative(pos, hed_block_size(&block));
2254 continue;
2257 if (swp_cpin(swp, &dataobj, block.dataobj,
2258 sizeof(struct hed_block_data))) {
2259 perror("Cannot read data descriptor");
2260 return -1;
2262 BDEBUG("DATA %p: size 0x%lx\n",
2263 block.dataobj, (long)dataobj.size);
2265 if (! (data = malloc(hed_block_size(&block))) ) {
2266 perror("Cannot allocate data");
2267 return -1;
2270 if (swp_cpin(swp, data, dataobj.data + block.dataoff,
2271 hed_block_size(&block))) {
2272 perror("Cannot read data");
2273 return -1;
2276 mergefn = hed_block_is_inserted(&block)
2277 ? hed_file_insert_once
2278 : hed_file_set_block;
2279 res = mergefn(&file->f, pos, data, hed_block_size(&block));
2280 free(data);
2281 if (res) {
2282 perror("Cannot merge data");
2283 return -1;
2285 } while (cur = next_block(&block), !hed_block_is_eof(&block));
2287 dump_blocks(file);
2288 return 0;
2292 hed_file_read_swap(struct hed_file *f)
2294 struct file_priv *file = file_private(f);
2295 struct swp_file *swp;
2296 hed_cursor_t pos;
2297 int ret;
2299 if (! (swp = swp_init_read(file->swpname)) )
2300 return -1;
2302 get_cursor(file, 0, &pos);
2303 ret = do_read_swap(file, swp, &pos);
2304 put_cursor(&pos);
2306 swp_done(swp);
2307 return ret;
2310 #else
2313 hed_file_write_swap(struct hed_file *file)
2315 return 0;
2319 hed_file_read_swap(struct hed_file *file)
2321 return -1;
2324 #endif /* HED_CONFIG_SWAP */
2326 static void
2327 reverse(unsigned char *p, size_t len)
2329 unsigned char *q = p + len;
2330 while (p < q) {
2331 unsigned char x = *p;
2332 *p++ = *--q;
2333 *q = x;
2337 static void
2338 compute_badchar(ssize_t *badchar, const unsigned char *s, ssize_t len)
2340 size_t i = 1;
2341 while (i < len)
2342 badchar[*s++] = i++;
2345 static void
2346 compute_sfx(ssize_t *sfx, const unsigned char *s, ssize_t len)
2348 ssize_t f, g, i;
2350 sfx[len - 1] = len;
2351 g = len - 1;
2352 for (i = len - 2; i >= 0; --i) {
2353 if (i > g && sfx[i + len - 1 - f] < i - g)
2354 sfx[i] = sfx[i + len - 1 - f];
2355 else {
2356 if (i < g)
2357 g = i;
2358 f = i;
2359 while (g >= 0 && s[g] == s[g + len - 1 - f])
2360 --g;
2361 sfx[i] = f - g;
2366 static void
2367 compute_goodsfx(ssize_t *goodsfx, const unsigned char *s, ssize_t len)
2369 ssize_t i, j, *sfx = goodsfx + len;
2371 compute_sfx(sfx, s, len);
2373 for (i = 0; i < len; ++i)
2374 goodsfx[i] = len;
2375 j = 0;
2376 for (i = len - 1; i >= 0; --i)
2377 if (sfx[i] == i + 1)
2378 for (; j < len - 1 - i; ++j)
2379 if (goodsfx[j] == len)
2380 goodsfx[j] = len - 1 - i;
2381 for (i = 0; i <= len - 2; ++i)
2382 goodsfx[len - 1 - sfx[i]] = len - 1 - i;
2385 /* Search for a constant byte string using the Boyer-Moore algorithm. */
2386 static inline unsigned char*
2387 search_buf(unsigned char *buf, size_t buflen, unsigned char *needle,
2388 size_t maxidx, ssize_t *badchar, ssize_t *goodsfx)
2390 if (!maxidx)
2391 return memchr(buf, *needle, buflen);
2393 while (buflen > maxidx) {
2394 unsigned char *p;
2395 size_t i;
2396 ssize_t shift;
2398 for (p = buf + maxidx, i = maxidx; p >= buf; --p, --i)
2399 if (needle[i] != *p)
2400 break;
2401 if (p < buf)
2402 return buf;
2404 shift = i + 1 - badchar[*p];
2405 if (shift < goodsfx[i])
2406 shift = goodsfx[i];
2408 buf += shift;
2409 buflen -= shift;
2411 return NULL;
2414 /* Search for a constant byte string backwards. */
2415 static inline unsigned char*
2416 search_buf_rev(unsigned char *buf, size_t buflen, unsigned char *needle,
2417 size_t maxidx, ssize_t *badchar, ssize_t *goodsfx)
2419 if (!maxidx)
2420 return memrchr(buf, *needle, buflen);
2422 buf += buflen - maxidx - 1;
2423 while (buflen > maxidx) {
2424 unsigned char *p;
2425 size_t i;
2426 ssize_t shift;
2428 for (p = buf, i = maxidx; p <= buf + maxidx; ++p, --i)
2429 if (needle[i] != *p)
2430 break;
2431 if (p > buf + maxidx)
2432 return buf;
2434 shift = i + 1 - badchar[*p];
2435 if (shift < goodsfx[i])
2436 shift = goodsfx[i];
2438 buf -= shift;
2439 buflen -= shift;
2441 return NULL;
2444 /* Search for a constant byte string using the Boyer-Moore algorithm. */
2445 static int
2446 find_bytestr(struct file_priv *file, hed_cursor_t *from, int dir,
2447 unsigned char *needle, size_t len)
2449 void *dynalloc;
2450 ssize_t *badchar, *goodsfx;
2451 unsigned char *readbuf;
2452 unsigned char *p, *q;
2453 size_t remain;
2454 int ret;
2456 if (len > 1) {
2457 dynalloc = calloc(sizeof(ssize_t) * (256 + 2*len)
2458 + 2*(len-1), 1);
2459 if (!dynalloc)
2460 return HED_FINDOFF_ERROR;
2461 badchar = dynalloc;
2462 goodsfx = badchar + 256;
2463 readbuf = dynalloc + sizeof(ssize_t) * (256 + 2*len);
2465 if (dir < 0)
2466 reverse(needle, len);
2467 compute_badchar(badchar, needle, len);
2468 compute_goodsfx(goodsfx, needle, len);
2469 } else {
2470 dynalloc = NULL;
2471 badchar = goodsfx = NULL;
2472 readbuf = NULL;
2475 assert(!hed_block_is_eof(from->block));
2477 --len; /* simplify offset computing */
2479 ret = HED_FINDOFF_NO_MATCH;
2480 if (dir < 0) {
2481 remain = -len;
2482 while (move_rel_fast(from, -remain),
2483 ret && from->pos >= len) {
2485 if (!hed_prepare_read(&file->f, from, SIZE_MAX)) {
2486 ret = HED_FINDOFF_ERROR;
2487 break;
2489 remain = from->off;
2491 if (hed_block_is_bad(from->block)) {
2492 /* bad blocks cannot match anything */
2493 ++remain;
2494 continue;
2497 if (remain >= len) {
2498 p = from->block->dataobj->data +
2499 from->block->dataoff;
2500 from->off -= remain;
2501 from->pos -= remain;
2502 ++remain;
2503 } else {
2504 remain += len;
2505 if (remain > from->pos)
2506 remain = from->pos;
2507 move_rel_fast(from, -remain);
2508 ++remain;
2509 if (hed_file_cpin(&file->f, readbuf,
2510 remain, from)) {
2511 ret = HED_FINDOFF_ERROR;
2512 break;
2514 p = readbuf;
2517 q = search_buf_rev(p, remain, needle, len,
2518 badchar, goodsfx);
2519 if (q) {
2520 ret = 0;
2521 remain = p - q;
2522 } else
2523 remain = -len + 1;
2525 } else {
2526 for ( ; ret && !hed_block_is_eof(from->block);
2527 move_rel_fast(from, remain)) {
2529 remain = hed_prepare_read(&file->f, from, SIZE_MAX);
2530 if (!remain) {
2531 ret = HED_FINDOFF_ERROR;
2532 break;
2535 if (hed_block_is_bad(from->block))
2536 /* bad blocks cannot match anything */
2537 continue;
2539 if (remain > len) {
2540 p = from->block->dataobj->data +
2541 from->block->dataoff + from->off;
2542 from->off += remain;
2543 from->pos += remain;
2544 } else {
2545 remain += len;
2546 if (copy_in(file, readbuf, remain, from)) {
2547 ret = HED_FINDOFF_ERROR;
2548 break;
2550 p = readbuf;
2553 q = search_buf(p, remain, needle, len,
2554 badchar, goodsfx);
2555 if (q) {
2556 ret = 0;
2557 remain = q - p - remain;
2558 } else
2559 remain = -len;
2563 if (dynalloc)
2564 free(dynalloc);
2565 return ret;
2568 static int
2569 find_expr(struct file_priv *file, hed_cursor_t *from, int dir,
2570 struct hed_expr *expr)
2572 size_t len = hed_expr_len(expr);
2573 unsigned char *buf;
2575 if (!len)
2576 return HED_FINDOFF_NO_MATCH;
2578 for (;;) {
2579 hed_cursor_t match;
2580 size_t remain;
2581 unsigned char *p;
2582 size_t pos;
2584 if (hed_expr_eval(expr) & HED_AEF_ERROR)
2585 return HED_FINDOFF_ERROR;
2586 buf = hed_expr_buf(expr);
2588 hed_dup_cursor(from, &match);
2589 remain = 0;
2590 for (pos = 0; pos < len; pos++) {
2591 if (!remain) {
2592 remain = hed_prepare_read(&file->f, &match,
2593 SIZE_MAX);
2594 if (!remain ||
2595 hed_block_is_bad(match.block))
2596 break;
2597 p = hed_cursor_data(&match);
2598 cursor_next_block(&match);
2600 if ((p ? *p++ : 0) != buf[pos])
2601 break;
2602 remain--;
2604 put_cursor(&match);
2606 if (pos == len)
2607 return 0;
2608 if (!remain)
2609 return HED_FINDOFF_ERROR;
2611 if (dir < 0 && hed_block_is_eof(from->block)) {
2612 from->pos -= from->off;
2613 from->off = 0;
2615 move_rel_fast(from, dir);
2616 if (hed_block_is_eof(from->block))
2617 break;
2619 if (! (hed_expr_flags(expr) & HED_AEF_DYNAMIC) )
2620 return find_bytestr(file, from, dir, buf, len);
2623 return HED_FINDOFF_NO_MATCH;
2627 hed_file_find_expr(struct hed_file *f, hed_cursor_t *pos, int dir,
2628 struct hed_expr *expr)
2630 struct file_priv *file = file_private(f);
2631 int res;
2633 assert(dir == 1 || dir == -1);
2635 set_readahead(file,
2636 dir > 0 ? HED_RA_FORWARD : HED_RA_BACKWARD);
2637 res = find_expr(file, pos, dir, expr);
2638 set_readahead(file, HED_RA_NONE);
2640 return res;