text: simplify remapping of original file content
[vis.git] / text.c
blob5ab53b38748175b575a9e59f4c0ec766fe59aea7
1 #ifndef _GNU_SOURCE
2 #define _GNU_SOURCE /* memrchr(3) is non-standard */
3 #endif
4 #include <unistd.h>
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include <time.h>
9 #include <fcntl.h>
10 #include <errno.h>
11 #include <wchar.h>
12 #include <stdint.h>
13 #include <libgen.h>
14 #include <limits.h>
15 #include <sys/types.h>
16 #include <sys/stat.h>
17 #include <sys/mman.h>
18 #if CONFIG_ACL
19 #include <sys/acl.h>
20 #endif
21 #if CONFIG_SELINUX
22 #include <selinux/selinux.h>
23 #endif
25 #include "text.h"
26 #include "text-util.h"
27 #include "text-motions.h"
28 #include "util.h"
30 /* Allocate blocks holding the actual file content in chunks of size: */
31 #ifndef BLOCK_SIZE
32 #define BLOCK_SIZE (1 << 20)
33 #endif
34 /* Files smaller than this value are copied on load, larger ones are mmap(2)-ed
35 * directely. Hence the former can be truncated, while doing so on the latter
36 * results in havoc. */
37 #define BLOCK_MMAP_SIZE (1 << 26)
39 /* Block holding the file content, either readonly mmap(2)-ed from the original
40 * file or heap allocated to store the modifications.
42 typedef struct Block Block;
43 struct Block {
44 size_t size; /* maximal capacity */
45 size_t len; /* current used length / insertion position */
46 char *data; /* actual data */
47 enum { /* type of allocation */
48 MMAP_ORIG, /* mmap(2)-ed from an external file */
49 MMAP, /* mmap(2)-ed from a temporary file only known to this process */
50 MALLOC, /* heap allocated block using malloc(3) */
51 } type;
52 Block *next; /* next chunk */
55 /* A piece holds a reference (but doesn't itself store) a certain amount of data.
56 * All active pieces chained together form the whole content of the document.
57 * At the beginning there exists only one piece, spanning the whole document.
58 * Upon insertion/deletion new pieces will be created to represent the changes.
59 * Generally pieces are never destroyed, but kept around to peform undo/redo
60 * operations.
62 struct Piece {
63 Text *text; /* text to which this piece belongs */
64 Piece *prev, *next; /* pointers to the logical predecessor/successor */
65 Piece *global_prev; /* double linked list in order of allocation, */
66 Piece *global_next; /* used to free individual pieces */
67 const char *data; /* pointer into a Block holding the data */
68 size_t len; /* the length in number of bytes of the data */
71 /* used to transform a global position (byte offset starting from the beginning
72 * of the text) into an offset relative to a piece.
74 typedef struct {
75 Piece *piece; /* piece holding the location */
76 size_t off; /* offset into the piece in bytes */
77 } Location;
79 /* A Span holds a certain range of pieces. Changes to the document are always
80 * performed by swapping out an existing span with a new one.
82 typedef struct {
83 Piece *start, *end; /* start/end of the span */
84 size_t len; /* the sum of the lengths of the pieces which form this span */
85 } Span;
87 /* A Change keeps all needed information to redo/undo an insertion/deletion. */
88 typedef struct Change Change;
89 struct Change {
90 Span old; /* all pieces which are being modified/swapped out by the change */
91 Span new; /* all pieces which are introduced/swapped in by the change */
92 size_t pos; /* absolute position at which the change occured */
93 Change *next; /* next change which is part of the same revision */
94 Change *prev; /* previous change which is part of the same revision */
97 /* A Revision is a list of Changes which are used to undo/redo all modifications
98 * since the last snapshot operation. Revisions are stored in a directed graph structure.
100 typedef struct Revision Revision;
101 struct Revision {
102 Change *change; /* the most recent change */
103 Revision *next; /* the next (child) revision in the undo tree */
104 Revision *prev; /* the previous (parent) revision in the undo tree */
105 Revision *earlier; /* the previous Revision, chronologically */
106 Revision *later; /* the next Revision, chronologically */
107 time_t time; /* when the first change of this revision was performed */
108 size_t seq; /* a unique, strictly increasing identifier */
111 typedef struct {
112 size_t pos; /* position in bytes from start of file */
113 size_t lineno; /* line number in file i.e. number of '\n' in [0, pos) */
114 } LineCache;
116 /* The main struct holding all information of a given file */
117 struct Text {
118 Block *block; /* original file content at the time of load operation */
119 Block *blocks; /* all blocks which have been allocated to hold insertion data */
120 Piece *pieces; /* all pieces which have been allocated, used to free them */
121 Piece *cache; /* most recently modified piece */
122 Piece begin, end; /* sentinel nodes which always exists but don't hold any data */
123 Revision *history; /* undo tree */
124 Revision *current_revision; /* revision holding all file changes until a snapshot is performed */
125 Revision *last_revision; /* the last revision added to the tree, chronologically */
126 Revision *saved_revision; /* the last revision at the time of the save operation */
127 size_t size; /* current file content size in bytes */
128 struct stat info; /* stat as probed at load time */
129 LineCache lines; /* mapping between absolute pos in bytes and logical line breaks */
132 struct TextSave { /* used to hold context between text_save_{begin,commit} calls */
133 Text *txt; /* text to operate on */
134 char *filename; /* filename to save to as given to text_save_begin */
135 char *tmpname; /* temporary name used for atomic rename(2) */
136 int fd; /* file descriptor to write data to using text_save_write */
137 enum TextSaveMethod type; /* method used to save file */
140 /* block management */
141 static Block *block_alloc(Text*, size_t size);
142 static Block *block_read(Text*, size_t size, int fd);
143 static Block *block_mmap(Text*, size_t size, int fd, off_t offset);
144 static void block_free(Block*);
145 static bool block_capacity(Block*, size_t len);
146 static const char *block_append(Block*, const char *data, size_t len);
147 static bool block_insert(Block*, size_t pos, const char *data, size_t len);
148 static bool block_delete(Block*, size_t pos, size_t len);
149 static const char *block_store(Text*, const char *data, size_t len);
150 /* cache layer */
151 static void cache_piece(Text *txt, Piece *p);
152 static bool cache_contains(Text *txt, Piece *p);
153 static bool cache_insert(Text *txt, Piece *p, size_t off, const char *data, size_t len);
154 static bool cache_delete(Text *txt, Piece *p, size_t off, size_t len);
155 /* piece management */
156 static Piece *piece_alloc(Text *txt);
157 static void piece_free(Piece *p);
158 static void piece_init(Piece *p, Piece *prev, Piece *next, const char *data, size_t len);
159 static Location piece_get_intern(Text *txt, size_t pos);
160 static Location piece_get_extern(Text *txt, size_t pos);
161 /* span management */
162 static void span_init(Span *span, Piece *start, Piece *end);
163 static void span_swap(Text *txt, Span *old, Span *new);
164 /* change management */
165 static Change *change_alloc(Text *txt, size_t pos);
166 static void change_free(Change *c);
167 /* revision management */
168 static Revision *revision_alloc(Text *txt);
169 static void revision_free(Revision *rev);
170 /* logical line counting cache */
171 static void lineno_cache_invalidate(LineCache *cache);
172 static size_t lines_skip_forward(Text *txt, size_t pos, size_t lines, size_t *lines_skiped);
173 static size_t lines_count(Text *txt, size_t pos, size_t len);
175 static ssize_t write_all(int fd, const char *buf, size_t count) {
176 size_t rem = count;
177 while (rem > 0) {
178 ssize_t written = write(fd, buf, rem > INT_MAX ? INT_MAX : rem);
179 if (written < 0) {
180 if (errno == EAGAIN || errno == EINTR)
181 continue;
182 return -1;
183 } else if (written == 0) {
184 break;
186 rem -= written;
187 buf += written;
189 return count - rem;
192 /* allocate a new block of MAX(size, BLOCK_SIZE) bytes */
193 static Block *block_alloc(Text *txt, size_t size) {
194 Block *blk = calloc(1, sizeof *blk);
195 if (!blk)
196 return NULL;
197 if (BLOCK_SIZE > size)
198 size = BLOCK_SIZE;
199 if (!(blk->data = malloc(size))) {
200 free(blk);
201 return NULL;
203 blk->type = MALLOC;
204 blk->size = size;
205 blk->next = txt->blocks;
206 txt->blocks = blk;
207 return blk;
210 static Block *block_read(Text *txt, size_t size, int fd) {
211 Block *blk = block_alloc(txt, size);
212 if (!blk)
213 return NULL;
214 while (size > 0) {
215 char data[4096];
216 ssize_t len = read(fd, data, MIN(sizeof(data), size));
217 if (len == -1) {
218 txt->blocks = blk->next;
219 block_free(blk);
220 return NULL;
221 } else if (len == 0) {
222 break;
223 } else {
224 block_append(blk, data, len);
225 size -= len;
228 return blk;
231 static Block *block_mmap(Text *txt, size_t size, int fd, off_t offset) {
232 Block *blk = calloc(1, sizeof *blk);
233 if (!blk)
234 return NULL;
235 if (size) {
236 blk->data = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, offset);
237 if (blk->data == MAP_FAILED) {
238 free(blk);
239 return NULL;
242 blk->type = MMAP_ORIG;
243 blk->size = size;
244 blk->len = size;
245 blk->next = txt->blocks;
246 txt->blocks = blk;
247 return blk;
250 static void block_free(Block *blk) {
251 if (!blk)
252 return;
253 if (blk->type == MALLOC)
254 free(blk->data);
255 else if ((blk->type == MMAP_ORIG || blk->type == MMAP) && blk->data)
256 munmap(blk->data, blk->size);
257 free(blk);
260 /* check whether block has enough free space to store len bytes */
261 static bool block_capacity(Block *blk, size_t len) {
262 return blk->size - blk->len >= len;
265 /* append data to block, assumes there is enough space available */
266 static const char *block_append(Block *blk, const char *data, size_t len) {
267 char *dest = memcpy(blk->data + blk->len, data, len);
268 blk->len += len;
269 return dest;
272 /* stores the given data in a block, allocates a new one if necessary. returns
273 * a pointer to the storage location or NULL if allocation failed. */
274 static const char *block_store(Text *txt, const char *data, size_t len) {
275 Block *blk = txt->blocks;
276 if ((!blk || !block_capacity(blk, len)) && !(blk = block_alloc(txt, len)))
277 return NULL;
278 return block_append(blk, data, len);
281 /* insert data into block at an arbitrary position, this should only be used with
282 * data of the most recently created piece. */
283 static bool block_insert(Block *blk, size_t pos, const char *data, size_t len) {
284 if (pos > blk->len || !block_capacity(blk, len))
285 return false;
286 if (blk->len == pos)
287 return block_append(blk, data, len);
288 char *insert = blk->data + pos;
289 memmove(insert + len, insert, blk->len - pos);
290 memcpy(insert, data, len);
291 blk->len += len;
292 return true;
295 /* delete data from a block at an arbitrary position, this should only be used with
296 * data of the most recently created piece. */
297 static bool block_delete(Block *blk, size_t pos, size_t len) {
298 size_t end;
299 if (!addu(pos, len, &end) || end > blk->len)
300 return false;
301 if (blk->len == pos) {
302 blk->len -= len;
303 return true;
305 char *delete = blk->data + pos;
306 memmove(delete, delete + len, blk->len - pos - len);
307 blk->len -= len;
308 return true;
311 /* cache the given piece if it is the most recently changed one */
312 static void cache_piece(Text *txt, Piece *p) {
313 Block *blk = txt->blocks;
314 if (!blk || p->data < blk->data || p->data + p->len != blk->data + blk->len)
315 return;
316 txt->cache = p;
319 /* check whether the given piece was the most recently modified one */
320 static bool cache_contains(Text *txt, Piece *p) {
321 Block *blk = txt->blocks;
322 Revision *rev = txt->current_revision;
323 if (!blk || !txt->cache || txt->cache != p || !rev || !rev->change)
324 return false;
326 Piece *start = rev->change->new.start;
327 Piece *end = rev->change->new.end;
328 bool found = false;
329 for (Piece *cur = start; !found; cur = cur->next) {
330 if (cur == p)
331 found = true;
332 if (cur == end)
333 break;
336 return found && p->data + p->len == blk->data + blk->len;
339 /* try to insert a chunk of data at a given piece offset. the insertion is only
340 * performed if the piece is the most recenetly changed one. the legnth of the
341 * piece, the span containing it and the whole text is adjusted accordingly */
342 static bool cache_insert(Text *txt, Piece *p, size_t off, const char *data, size_t len) {
343 if (!cache_contains(txt, p))
344 return false;
345 Block *blk = txt->blocks;
346 size_t bufpos = p->data + off - blk->data;
347 if (!block_insert(blk, bufpos, data, len))
348 return false;
349 p->len += len;
350 txt->current_revision->change->new.len += len;
351 txt->size += len;
352 return true;
355 /* try to delete a chunk of data at a given piece offset. the deletion is only
356 * performed if the piece is the most recenetly changed one and the whole
357 * affected range lies within it. the legnth of the piece, the span containing it
358 * and the whole text is adjusted accordingly */
359 static bool cache_delete(Text *txt, Piece *p, size_t off, size_t len) {
360 if (!cache_contains(txt, p))
361 return false;
362 Block *blk = txt->blocks;
363 size_t end;
364 size_t bufpos = p->data + off - blk->data;
365 if (!addu(off, len, &end) || end > p->len || !block_delete(blk, bufpos, len))
366 return false;
367 p->len -= len;
368 txt->current_revision->change->new.len -= len;
369 txt->size -= len;
370 return true;
373 /* initialize a span and calculate its length */
374 static void span_init(Span *span, Piece *start, Piece *end) {
375 size_t len = 0;
376 span->start = start;
377 span->end = end;
378 for (Piece *p = start; p; p = p->next) {
379 len += p->len;
380 if (p == end)
381 break;
383 span->len = len;
386 /* swap out an old span and replace it with a new one.
388 * - if old is an empty span do not remove anything, just insert the new one
389 * - if new is an empty span do not insert anything, just remove the old one
391 * adjusts the document size accordingly.
393 static void span_swap(Text *txt, Span *old, Span *new) {
394 if (old->len == 0 && new->len == 0) {
395 return;
396 } else if (old->len == 0) {
397 /* insert new span */
398 new->start->prev->next = new->start;
399 new->end->next->prev = new->end;
400 } else if (new->len == 0) {
401 /* delete old span */
402 old->start->prev->next = old->end->next;
403 old->end->next->prev = old->start->prev;
404 } else {
405 /* replace old with new */
406 old->start->prev->next = new->start;
407 old->end->next->prev = new->end;
409 txt->size -= old->len;
410 txt->size += new->len;
413 /* Allocate a new revision and place it in the revision graph.
414 * All further changes will be associated with this revision. */
415 static Revision *revision_alloc(Text *txt) {
416 Revision *rev = calloc(1, sizeof *rev);
417 if (!rev)
418 return NULL;
419 rev->time = time(NULL);
420 txt->current_revision = rev;
422 /* set sequence number */
423 if (!txt->last_revision)
424 rev->seq = 0;
425 else
426 rev->seq = txt->last_revision->seq + 1;
428 /* set earlier, later pointers */
429 if (txt->last_revision)
430 txt->last_revision->later = rev;
431 rev->earlier = txt->last_revision;
433 if (!txt->history) {
434 txt->history = rev;
435 return rev;
438 /* set prev, next pointers */
439 rev->prev = txt->history;
440 txt->history->next = rev;
441 txt->history = rev;
442 return rev;
445 static void revision_free(Revision *rev) {
446 if (!rev)
447 return;
448 for (Change *next, *c = rev->change; c; c = next) {
449 next = c->next;
450 change_free(c);
452 free(rev);
455 static Piece *piece_alloc(Text *txt) {
456 Piece *p = calloc(1, sizeof *p);
457 if (!p)
458 return NULL;
459 p->text = txt;
460 p->global_next = txt->pieces;
461 if (txt->pieces)
462 txt->pieces->global_prev = p;
463 txt->pieces = p;
464 return p;
467 static void piece_free(Piece *p) {
468 if (!p)
469 return;
470 if (p->global_prev)
471 p->global_prev->global_next = p->global_next;
472 if (p->global_next)
473 p->global_next->global_prev = p->global_prev;
474 if (p->text->pieces == p)
475 p->text->pieces = p->global_next;
476 if (p->text->cache == p)
477 p->text->cache = NULL;
478 free(p);
481 static void piece_init(Piece *p, Piece *prev, Piece *next, const char *data, size_t len) {
482 p->prev = prev;
483 p->next = next;
484 p->data = data;
485 p->len = len;
488 /* returns the piece holding the text at byte offset pos. if pos happens to
489 * be at a piece boundry i.e. the first byte of a piece then the previous piece
490 * to the left is returned with an offset of piece->len. this is convenient for
491 * modifications to the piece chain where both pieces (the returned one and the
492 * one following it) are needed, but unsuitable as a public interface.
494 * in particular if pos is zero, the begin sentinel piece is returned.
496 static Location piece_get_intern(Text *txt, size_t pos) {
497 size_t cur = 0;
498 for (Piece *p = &txt->begin; p->next; p = p->next) {
499 if (cur <= pos && pos <= cur + p->len)
500 return (Location){ .piece = p, .off = pos - cur };
501 cur += p->len;
504 return (Location){ 0 };
507 /* similiar to piece_get_intern but usable as a public API. returns the piece
508 * holding the text at byte offset pos. never returns a sentinel piece.
509 * it pos is the end of file (== text_size()) and the file is not empty then
510 * the last piece holding data is returned.
512 static Location piece_get_extern(Text *txt, size_t pos) {
513 size_t cur = 0;
514 Piece *p;
516 for (p = txt->begin.next; p->next; p = p->next) {
517 if (cur <= pos && pos < cur + p->len)
518 return (Location){ .piece = p, .off = pos - cur };
519 cur += p->len;
522 if (cur == pos)
523 return (Location){ .piece = p->prev, .off = p->prev->len };
525 return (Location){ 0 };
528 /* allocate a new change, associate it with current revision or a newly
529 * allocated one if none exists. */
530 static Change *change_alloc(Text *txt, size_t pos) {
531 Revision *rev = txt->current_revision;
532 if (!rev) {
533 rev = revision_alloc(txt);
534 if (!rev)
535 return NULL;
537 Change *c = calloc(1, sizeof *c);
538 if (!c)
539 return NULL;
540 c->pos = pos;
541 c->next = rev->change;
542 if (rev->change)
543 rev->change->prev = c;
544 rev->change = c;
545 return c;
548 static void change_free(Change *c) {
549 if (!c)
550 return;
551 /* only free the new part of the span, the old one is still in use */
552 if (c->new.start != c->new.end)
553 piece_free(c->new.end);
554 piece_free(c->new.start);
555 free(c);
558 /* When inserting new data there are 2 cases to consider.
560 * - in the first the insertion point falls into the middle of an exisiting
561 * piece which is replaced by three new pieces:
563 * /-+ --> +---------------+ --> +-\
564 * | | | existing text | | |
565 * \-+ <-- +---------------+ <-- +-/
567 * Insertion point for "demo "
569 * /-+ --> +---------+ --> +-----+ --> +-----+ --> +-\
570 * | | | existing| |demo | |text | | |
571 * \-+ <-- +---------+ <-- +-----+ <-- +-----+ <-- +-/
573 * - the second case deals with an insertion point at a piece boundry:
575 * /-+ --> +---------------+ --> +-\
576 * | | | existing text | | |
577 * \-+ <-- +---------------+ <-- +-/
579 * Insertion point for "short"
581 * /-+ --> +-----+ --> +---------------+ --> +-\
582 * | | |short| | existing text | | |
583 * \-+ <-- +-----+ <-- +---------------+ <-- +-/
585 bool text_insert(Text *txt, size_t pos, const char *data, size_t len) {
586 if (len == 0)
587 return true;
588 if (pos > txt->size)
589 return false;
590 if (pos < txt->lines.pos)
591 lineno_cache_invalidate(&txt->lines);
593 Location loc = piece_get_intern(txt, pos);
594 Piece *p = loc.piece;
595 if (!p)
596 return false;
597 size_t off = loc.off;
598 if (cache_insert(txt, p, off, data, len))
599 return true;
601 Change *c = change_alloc(txt, pos);
602 if (!c)
603 return false;
605 if (!(data = block_store(txt, data, len)))
606 return false;
608 Piece *new = NULL;
610 if (off == p->len) {
611 /* insert between two existing pieces, hence there is nothing to
612 * remove, just add a new piece holding the extra text */
613 if (!(new = piece_alloc(txt)))
614 return false;
615 piece_init(new, p, p->next, data, len);
616 span_init(&c->new, new, new);
617 span_init(&c->old, NULL, NULL);
618 } else {
619 /* insert into middle of an existing piece, therfore split the old
620 * piece. that is we have 3 new pieces one containing the content
621 * before the insertion point then one holding the newly inserted
622 * text and one holding the content after the insertion point.
624 Piece *before = piece_alloc(txt);
625 new = piece_alloc(txt);
626 Piece *after = piece_alloc(txt);
627 if (!before || !new || !after)
628 return false;
629 piece_init(before, p->prev, new, p->data, off);
630 piece_init(new, before, after, data, len);
631 piece_init(after, new, p->next, p->data + off, p->len - off);
633 span_init(&c->new, before, after);
634 span_init(&c->old, p, p);
637 cache_piece(txt, new);
638 span_swap(txt, &c->old, &c->new);
639 return true;
642 static bool text_vprintf(Text *txt, size_t pos, const char *format, va_list ap) {
643 va_list ap_save;
644 va_copy(ap_save, ap);
645 int len = vsnprintf(NULL, 0, format, ap);
646 if (len == -1) {
647 va_end(ap_save);
648 return false;
650 char *buf = malloc(len+1);
651 bool ret = buf && (vsnprintf(buf, len+1, format, ap_save) == len) && text_insert(txt, pos, buf, len);
652 free(buf);
653 va_end(ap_save);
654 return ret;
657 bool text_appendf(Text *txt, const char *format, ...) {
658 va_list ap;
659 va_start(ap, format);
660 bool ret = text_vprintf(txt, text_size(txt), format, ap);
661 va_end(ap);
662 return ret;
665 bool text_printf(Text *txt, size_t pos, const char *format, ...) {
666 va_list ap;
667 va_start(ap, format);
668 bool ret = text_vprintf(txt, pos, format, ap);
669 va_end(ap);
670 return ret;
673 static size_t revision_undo(Text *txt, Revision *rev) {
674 size_t pos = EPOS;
675 for (Change *c = rev->change; c; c = c->next) {
676 span_swap(txt, &c->new, &c->old);
677 pos = c->pos;
679 return pos;
682 static size_t revision_redo(Text *txt, Revision *rev) {
683 size_t pos = EPOS;
684 Change *c = rev->change;
685 while (c->next)
686 c = c->next;
687 for ( ; c; c = c->prev) {
688 span_swap(txt, &c->old, &c->new);
689 pos = c->pos;
690 if (c->new.len > c->old.len)
691 pos += c->new.len - c->old.len;
693 return pos;
696 size_t text_undo(Text *txt) {
697 size_t pos = EPOS;
698 /* taking rev snapshot makes sure that txt->current_revision is reset */
699 text_snapshot(txt);
700 Revision *rev = txt->history->prev;
701 if (!rev)
702 return pos;
703 pos = revision_undo(txt, txt->history);
704 txt->history = rev;
705 lineno_cache_invalidate(&txt->lines);
706 return pos;
709 size_t text_redo(Text *txt) {
710 size_t pos = EPOS;
711 /* taking a snapshot makes sure that txt->current_revision is reset */
712 text_snapshot(txt);
713 Revision *rev = txt->history->next;
714 if (!rev)
715 return pos;
716 pos = revision_redo(txt, rev);
717 txt->history = rev;
718 lineno_cache_invalidate(&txt->lines);
719 return pos;
722 static bool history_change_branch(Revision *rev) {
723 bool changed = false;
724 while (rev->prev) {
725 if (rev->prev->next != rev) {
726 rev->prev->next = rev;
727 changed = true;
729 rev = rev->prev;
731 return changed;
734 static size_t history_traverse_to(Text *txt, Revision *rev) {
735 size_t pos = EPOS;
736 if (!rev)
737 return pos;
738 bool changed = history_change_branch(rev);
739 if (!changed) {
740 if (rev->seq == txt->history->seq) {
741 return txt->lines.pos;
742 } else if (rev->seq > txt->history->seq) {
743 while (txt->history != rev)
744 pos = text_redo(txt);
745 return pos;
746 } else if (rev->seq < txt->history->seq) {
747 while (txt->history != rev)
748 pos = text_undo(txt);
749 return pos;
751 } else {
752 while (txt->history->prev && txt->history->prev->next == txt->history)
753 text_undo(txt);
754 pos = text_undo(txt);
755 while (txt->history != rev)
756 pos = text_redo(txt);
757 return pos;
759 return pos;
762 size_t text_earlier(Text *txt) {
763 return history_traverse_to(txt, txt->history->earlier);
766 size_t text_later(Text *txt) {
767 return history_traverse_to(txt, txt->history->later);
770 size_t text_restore(Text *txt, time_t time) {
771 Revision *rev = txt->history;
772 while (time < rev->time && rev->earlier)
773 rev = rev->earlier;
774 while (time > rev->time && rev->later)
775 rev = rev->later;
776 time_t diff = labs(rev->time - time);
777 if (rev->earlier && rev->earlier != txt->history && labs(rev->earlier->time - time) < diff)
778 rev = rev->earlier;
779 if (rev->later && rev->later != txt->history && labs(rev->later->time - time) < diff)
780 rev = rev->later;
781 return history_traverse_to(txt, rev);
784 time_t text_state(Text *txt) {
785 return txt->history->time;
788 static bool preserve_acl(int src, int dest) {
789 #if CONFIG_ACL
790 acl_t acl = acl_get_fd(src);
791 if (!acl)
792 return errno == ENOTSUP ? true : false;
793 if (acl_set_fd(dest, acl) == -1) {
794 acl_free(acl);
795 return false;
797 acl_free(acl);
798 #endif /* CONFIG_ACL */
799 return true;
802 static bool preserve_selinux_context(int src, int dest) {
803 #if CONFIG_SELINUX
804 char *context = NULL;
805 if (!is_selinux_enabled())
806 return true;
807 if (fgetfilecon(src, &context) == -1)
808 return errno == ENOTSUP ? true : false;
809 if (fsetfilecon(dest, context) == -1) {
810 freecon(context);
811 return false;
813 freecon(context);
814 #endif /* CONFIG_SELINUX */
815 return true;
818 /* Create a new file named `.filename.vis.XXXXXX` (where `XXXXXX` is a
819 * randomly generated, unique suffix) and try to preserve all important
820 * meta data. After the file content has been written to this temporary
821 * file, text_save_commit_atomic will atomically move it to its final
822 * (possibly already existing) destination using rename(2).
824 * This approach does not work if:
826 * - the file is a symbolic link
827 * - the file is a hard link
828 * - file ownership can not be preserved
829 * - file group can not be preserved
830 * - directory permissions do not allow creation of a new file
831 * - POSXI ACL can not be preserved (if enabled)
832 * - SELinux security context can not be preserved (if enabled)
834 static bool text_save_begin_atomic(TextSave *ctx) {
835 int oldfd, saved_errno;
836 if ((oldfd = open(ctx->filename, O_RDONLY)) == -1 && errno != ENOENT)
837 goto err;
838 struct stat oldmeta = { 0 };
839 if (oldfd != -1 && lstat(ctx->filename, &oldmeta) == -1)
840 goto err;
841 if (oldfd != -1) {
842 if (S_ISLNK(oldmeta.st_mode)) /* symbolic link */
843 goto err;
844 if (oldmeta.st_nlink > 1) /* hard link */
845 goto err;
848 char suffix[] = ".vis.XXXXXX";
849 size_t len = strlen(ctx->filename) + sizeof("./.") + sizeof(suffix);
850 char *dir = strdup(ctx->filename);
851 char *base = strdup(ctx->filename);
853 if (!(ctx->tmpname = malloc(len)) || !dir || !base) {
854 free(dir);
855 free(base);
856 goto err;
859 snprintf(ctx->tmpname, len, "%s/.%s%s", dirname(dir), basename(base), suffix);
860 free(dir);
861 free(base);
863 if ((ctx->fd = mkstemp(ctx->tmpname)) == -1)
864 goto err;
866 if (oldfd == -1) {
867 mode_t mask = umask(0);
868 umask(mask);
869 if (fchmod(ctx->fd, 0666 & ~mask) == -1)
870 goto err;
871 } else {
872 if (fchmod(ctx->fd, oldmeta.st_mode) == -1)
873 goto err;
874 if (!preserve_acl(oldfd, ctx->fd) || !preserve_selinux_context(oldfd, ctx->fd))
875 goto err;
876 /* change owner if necessary */
877 if (oldmeta.st_uid != getuid() && fchown(ctx->fd, oldmeta.st_uid, (uid_t)-1) == -1)
878 goto err;
879 /* change group if necessary, in case of failure some editors reset
880 * the group permissions to the same as for others */
881 if (oldmeta.st_gid != getgid() && fchown(ctx->fd, (uid_t)-1, oldmeta.st_gid) == -1)
882 goto err;
883 close(oldfd);
886 ctx->type = TEXT_SAVE_ATOMIC;
887 return true;
888 err:
889 saved_errno = errno;
890 if (oldfd != -1)
891 close(oldfd);
892 if (ctx->fd != -1)
893 close(ctx->fd);
894 ctx->fd = -1;
895 free(ctx->tmpname);
896 ctx->tmpname = NULL;
897 errno = saved_errno;
898 return false;
901 static bool text_save_commit_atomic(TextSave *ctx) {
902 if (fsync(ctx->fd) == -1)
903 return false;
905 struct stat meta = { 0 };
906 if (fstat(ctx->fd, &meta) == -1)
907 return false;
909 bool close_failed = (close(ctx->fd) == -1);
910 ctx->fd = -1;
911 if (close_failed)
912 return false;
914 if (rename(ctx->tmpname, ctx->filename) == -1)
915 return false;
917 free(ctx->tmpname);
918 ctx->tmpname = NULL;
920 int dir = open(dirname(ctx->filename), O_DIRECTORY|O_RDONLY);
921 if (dir == -1)
922 return false;
924 if (fsync(dir) == -1 && errno != EINVAL) {
925 close(dir);
926 return false;
929 if (close(dir) == -1)
930 return false;
932 if (meta.st_mtime)
933 ctx->txt->info = meta;
934 return true;
937 static bool text_save_begin_inplace(TextSave *ctx) {
938 Text *txt = ctx->txt;
939 struct stat meta = { 0 };
940 int newfd = -1, saved_errno;
941 if ((ctx->fd = open(ctx->filename, O_CREAT|O_WRONLY, 0666)) == -1)
942 goto err;
943 if (fstat(ctx->fd, &meta) == -1)
944 goto err;
945 Block *block = txt->block;
946 if (meta.st_dev == txt->info.st_dev && meta.st_ino == txt->info.st_ino &&
947 block && block->type == MMAP_ORIG && block->size) {
948 /* The file we are going to overwrite is currently mmap-ed from
949 * text_load, therefore we copy the mmap-ed block to a temporary
950 * file and remap it at the same position such that all pointers
951 * from the various pieces are still valid.
953 size_t size = block->size;
954 char tmpname[32] = "/tmp/vis-XXXXXX";
955 newfd = mkstemp(tmpname);
956 if (newfd == -1)
957 goto err;
958 if (unlink(tmpname) == -1)
959 goto err;
960 ssize_t written = write_all(newfd, block->data, size);
961 if (written == -1 || (size_t)written != size)
962 goto err;
963 void *data = mmap(block->data, size, PROT_READ, MAP_SHARED|MAP_FIXED, newfd, 0);
964 if (data == MAP_FAILED)
965 goto err;
966 bool close_failed = (close(newfd) == -1);
967 newfd = -1;
968 if (close_failed)
969 goto err;
970 block->type = MMAP;
972 /* overwrite the existing file content, if something goes wrong
973 * here we are screwed, TODO: make a backup before? */
974 if (ftruncate(ctx->fd, 0) == -1)
975 goto err;
976 ctx->type = TEXT_SAVE_INPLACE;
977 return true;
978 err:
979 saved_errno = errno;
980 if (newfd != -1)
981 close(newfd);
982 if (ctx->fd != -1)
983 close(ctx->fd);
984 ctx->fd = -1;
985 errno = saved_errno;
986 return false;
989 static bool text_save_commit_inplace(TextSave *ctx) {
990 if (fsync(ctx->fd) == -1)
991 return false;
992 struct stat meta = { 0 };
993 if (fstat(ctx->fd, &meta) == -1)
994 return false;
995 if (close(ctx->fd) == -1)
996 return false;
997 ctx->txt->info = meta;
998 return true;
1001 TextSave *text_save_begin(Text *txt, const char *filename, enum TextSaveMethod type) {
1002 if (!filename)
1003 return NULL;
1004 TextSave *ctx = calloc(1, sizeof *ctx);
1005 if (!ctx)
1006 return NULL;
1007 ctx->txt = txt;
1008 ctx->fd = -1;
1009 if (!(ctx->filename = strdup(filename)))
1010 goto err;
1011 errno = 0;
1012 if ((type == TEXT_SAVE_AUTO || type == TEXT_SAVE_ATOMIC) && text_save_begin_atomic(ctx))
1013 return ctx;
1014 if (errno == ENOSPC)
1015 goto err;
1016 if ((type == TEXT_SAVE_AUTO || type == TEXT_SAVE_INPLACE) && text_save_begin_inplace(ctx))
1017 return ctx;
1018 err:
1019 text_save_cancel(ctx);
1020 return NULL;
1023 bool text_save_commit(TextSave *ctx) {
1024 if (!ctx)
1025 return true;
1026 bool ret;
1027 Text *txt = ctx->txt;
1028 switch (ctx->type) {
1029 case TEXT_SAVE_ATOMIC:
1030 ret = text_save_commit_atomic(ctx);
1031 break;
1032 case TEXT_SAVE_INPLACE:
1033 ret = text_save_commit_inplace(ctx);
1034 break;
1035 default:
1036 ret = false;
1037 break;
1040 if (ret) {
1041 txt->saved_revision = txt->history;
1042 text_snapshot(txt);
1044 text_save_cancel(ctx);
1045 return ret;
1048 void text_save_cancel(TextSave *ctx) {
1049 if (!ctx)
1050 return;
1051 int saved_errno = errno;
1052 if (ctx->fd != -1)
1053 close(ctx->fd);
1054 if (ctx->tmpname && ctx->tmpname[0])
1055 unlink(ctx->tmpname);
1056 free(ctx->tmpname);
1057 free(ctx->filename);
1058 free(ctx);
1059 errno = saved_errno;
1062 /* First try to save the file atomically using rename(2) if this does not
1063 * work overwrite the file in place. However if something goes wrong during
1064 * this overwrite the original file is permanently damaged.
1066 bool text_save(Text *txt, const char *filename) {
1067 return text_save_method(txt, filename, TEXT_SAVE_AUTO);
1070 bool text_save_method(Text *txt, const char *filename, enum TextSaveMethod method) {
1071 if (!filename) {
1072 txt->saved_revision = txt->history;
1073 text_snapshot(txt);
1074 return true;
1076 TextSave *ctx = text_save_begin(txt, filename, method);
1077 if (!ctx)
1078 return false;
1079 Filerange range = (Filerange){ .start = 0, .end = text_size(txt) };
1080 ssize_t written = text_save_write_range(ctx, &range);
1081 if (written == -1 || (size_t)written != text_range_size(&range)) {
1082 text_save_cancel(ctx);
1083 return false;
1085 return text_save_commit(ctx);
1088 ssize_t text_save_write_range(TextSave *ctx, Filerange *range) {
1089 return text_write_range(ctx->txt, range, ctx->fd);
1092 ssize_t text_write(Text *txt, int fd) {
1093 Filerange r = (Filerange){ .start = 0, .end = text_size(txt) };
1094 return text_write_range(txt, &r, fd);
1097 ssize_t text_write_range(Text *txt, Filerange *range, int fd) {
1098 size_t size = text_range_size(range), rem = size;
1099 for (Iterator it = text_iterator_get(txt, range->start);
1100 rem > 0 && text_iterator_valid(&it);
1101 text_iterator_next(&it)) {
1102 size_t prem = it.end - it.text;
1103 if (prem > rem)
1104 prem = rem;
1105 ssize_t written = write_all(fd, it.text, prem);
1106 if (written == -1)
1107 return -1;
1108 rem -= written;
1109 if ((size_t)written != prem)
1110 break;
1112 return size - rem;
1115 Text *text_load(const char *filename) {
1116 return text_load_method(filename, TEXT_LOAD_AUTO);
1119 Text *text_load_method(const char *filename, enum TextLoadMethod method) {
1120 int fd = -1;
1121 size_t size = 0;
1122 Text *txt = calloc(1, sizeof *txt);
1123 if (!txt)
1124 return NULL;
1125 Piece *p = piece_alloc(txt);
1126 if (!p)
1127 goto out;
1128 lineno_cache_invalidate(&txt->lines);
1129 if (filename) {
1130 if ((fd = open(filename, O_RDONLY)) == -1)
1131 goto out;
1132 if (fstat(fd, &txt->info) == -1)
1133 goto out;
1134 if (!S_ISREG(txt->info.st_mode)) {
1135 errno = S_ISDIR(txt->info.st_mode) ? EISDIR : ENOTSUP;
1136 goto out;
1138 // XXX: use lseek(fd, 0, SEEK_END); instead?
1139 size = txt->info.st_size;
1140 if (size > 0) {
1141 if (method == TEXT_LOAD_READ || (method == TEXT_LOAD_AUTO && size < BLOCK_MMAP_SIZE))
1142 txt->block = block_read(txt, size, fd);
1143 else
1144 txt->block = block_mmap(txt, size, fd, 0);
1145 if (!txt->block)
1146 goto out;
1147 piece_init(p, &txt->begin, &txt->end, txt->block->data, txt->block->len);
1151 if (size == 0)
1152 piece_init(p, &txt->begin, &txt->end, "\0", 0);
1154 piece_init(&txt->begin, NULL, p, NULL, 0);
1155 piece_init(&txt->end, p, NULL, NULL, 0);
1156 txt->size = p->len;
1157 /* write an empty revision */
1158 change_alloc(txt, EPOS);
1159 text_snapshot(txt);
1160 txt->saved_revision = txt->history;
1162 if (fd != -1)
1163 close(fd);
1164 return txt;
1165 out:
1166 if (fd != -1)
1167 close(fd);
1168 text_free(txt);
1169 return NULL;
1172 struct stat text_stat(Text *txt) {
1173 return txt->info;
1176 /* A delete operation can either start/stop midway through a piece or at
1177 * a boundry. In the former case a new piece is created to represent the
1178 * remaining text before/after the modification point.
1180 * /-+ --> +---------+ --> +-----+ --> +-----+ --> +-\
1181 * | | | existing| |demo | |text | | |
1182 * \-+ <-- +---------+ <-- +-----+ <-- +-----+ <-- +-/
1183 * ^ ^
1184 * |------ delete range -----|
1186 * /-+ --> +----+ --> +--+ --> +-\
1187 * | | | exi| |t | | |
1188 * \-+ <-- +----+ <-- +--+ <-- +-/
1190 bool text_delete(Text *txt, size_t pos, size_t len) {
1191 if (len == 0)
1192 return true;
1193 size_t pos_end;
1194 if (!addu(pos, len, &pos_end) || pos_end > txt->size)
1195 return false;
1196 if (pos < txt->lines.pos)
1197 lineno_cache_invalidate(&txt->lines);
1199 Location loc = piece_get_intern(txt, pos);
1200 Piece *p = loc.piece;
1201 if (!p)
1202 return false;
1203 size_t off = loc.off;
1204 if (cache_delete(txt, p, off, len))
1205 return true;
1206 Change *c = change_alloc(txt, pos);
1207 if (!c)
1208 return false;
1210 bool midway_start = false, midway_end = false; /* split pieces? */
1211 Piece *before, *after; /* unmodified pieces before/after deletion point */
1212 Piece *start, *end; /* span which is removed */
1213 size_t cur; /* how much has already been deleted */
1215 if (off == p->len) {
1216 /* deletion starts at a piece boundry */
1217 cur = 0;
1218 before = p;
1219 start = p->next;
1220 } else {
1221 /* deletion starts midway through a piece */
1222 midway_start = true;
1223 cur = p->len - off;
1224 start = p;
1225 before = piece_alloc(txt);
1226 if (!before)
1227 return false;
1230 /* skip all pieces which fall into deletion range */
1231 while (cur < len) {
1232 p = p->next;
1233 cur += p->len;
1236 if (cur == len) {
1237 /* deletion stops at a piece boundry */
1238 end = p;
1239 after = p->next;
1240 } else {
1241 /* cur > len: deletion stops midway through a piece */
1242 midway_end = true;
1243 end = p;
1244 after = piece_alloc(txt);
1245 if (!after)
1246 return false;
1247 piece_init(after, before, p->next, p->data + p->len - (cur - len), cur - len);
1250 if (midway_start) {
1251 /* we finally know which piece follows our newly allocated before piece */
1252 piece_init(before, start->prev, after, start->data, off);
1255 Piece *new_start = NULL, *new_end = NULL;
1256 if (midway_start) {
1257 new_start = before;
1258 if (!midway_end)
1259 new_end = before;
1261 if (midway_end) {
1262 if (!midway_start)
1263 new_start = after;
1264 new_end = after;
1267 span_init(&c->new, new_start, new_end);
1268 span_init(&c->old, start, end);
1269 span_swap(txt, &c->old, &c->new);
1270 return true;
1273 bool text_delete_range(Text *txt, Filerange *r) {
1274 if (!text_range_valid(r))
1275 return false;
1276 return text_delete(txt, r->start, text_range_size(r));
1279 /* preserve the current text content such that it can be restored by
1280 * means of undo/redo operations */
1281 void text_snapshot(Text *txt) {
1282 if (txt->current_revision)
1283 txt->last_revision = txt->current_revision;
1284 txt->current_revision = NULL;
1285 txt->cache = NULL;
1289 void text_free(Text *txt) {
1290 if (!txt)
1291 return;
1293 // free history
1294 Revision *hist = txt->history;
1295 while (hist && hist->prev)
1296 hist = hist->prev;
1297 while (hist) {
1298 Revision *later = hist->later;
1299 revision_free(hist);
1300 hist = later;
1303 for (Piece *next, *p = txt->pieces; p; p = next) {
1304 next = p->global_next;
1305 piece_free(p);
1308 for (Block *next, *blk = txt->blocks; blk; blk = next) {
1309 next = blk->next;
1310 block_free(blk);
1313 free(txt);
1316 bool text_modified(Text *txt) {
1317 return txt->saved_revision != txt->history;
1320 bool text_mmaped(Text *txt, const char *ptr) {
1321 uintptr_t addr = (uintptr_t)ptr;
1322 for (Block *blk = txt->blocks; blk; blk = blk->next) {
1323 if ((blk->type == MMAP_ORIG || blk->type == MMAP) &&
1324 (uintptr_t)(blk->data) <= addr && addr < (uintptr_t)(blk->data + blk->size))
1325 return true;
1327 return false;
1330 static bool text_iterator_init(Iterator *it, size_t pos, Piece *p, size_t off) {
1331 Iterator iter = (Iterator){
1332 .pos = pos,
1333 .piece = p,
1334 .start = p ? p->data : NULL,
1335 .end = p ? p->data + p->len : NULL,
1336 .text = p ? p->data + off : NULL,
1338 *it = iter;
1339 return text_iterator_valid(it);
1342 Iterator text_iterator_get(Text *txt, size_t pos) {
1343 Iterator it;
1344 Location loc = piece_get_extern(txt, pos);
1345 text_iterator_init(&it, pos, loc.piece, loc.off);
1346 return it;
1349 bool text_iterator_byte_get(Iterator *it, char *b) {
1350 if (text_iterator_valid(it)) {
1351 if (it->start <= it->text && it->text < it->end) {
1352 *b = *it->text;
1353 return true;
1354 } else if (it->pos == it->piece->text->size) { /* EOF */
1355 *b = '\0';
1356 return true;
1359 return false;
1362 bool text_iterator_next(Iterator *it) {
1363 size_t rem = it->end - it->text;
1364 return text_iterator_init(it, it->pos+rem, it->piece ? it->piece->next : NULL, 0);
1367 bool text_iterator_prev(Iterator *it) {
1368 size_t off = it->text - it->start;
1369 size_t len = it->piece && it->piece->prev ? it->piece->prev->len : 0;
1370 return text_iterator_init(it, it->pos-off, it->piece ? it->piece->prev : NULL, len);
1373 bool text_iterator_valid(const Iterator *it) {
1374 /* filter out sentinel nodes */
1375 return it->piece && it->piece->text;
1378 bool text_iterator_byte_next(Iterator *it, char *b) {
1379 if (!it->piece || !it->piece->next)
1380 return false;
1381 bool eof = true;
1382 if (it->text < it->end) {
1383 it->text++;
1384 it->pos++;
1385 eof = false;
1386 } else if (!it->piece->prev) {
1387 eof = false;
1390 while (it->text == it->end) {
1391 if (!text_iterator_next(it)) {
1392 if (eof)
1393 return false;
1394 if (b)
1395 *b = '\0';
1396 return text_iterator_prev(it);
1400 if (b)
1401 *b = *it->text;
1402 return true;
1405 bool text_iterator_byte_prev(Iterator *it, char *b) {
1406 if (!it->piece || !it->piece->prev)
1407 return false;
1408 bool eof = !it->piece->next;
1409 while (it->text == it->start) {
1410 if (!text_iterator_prev(it)) {
1411 if (!eof)
1412 return false;
1413 if (b)
1414 *b = '\0';
1415 return text_iterator_next(it);
1419 --it->text;
1420 --it->pos;
1422 if (b)
1423 *b = *it->text;
1424 return true;
1427 bool text_iterator_byte_find_prev(Iterator *it, char b) {
1428 while (it->text) {
1429 const char *match = memrchr(it->start, b, it->text - it->start);
1430 if (match) {
1431 it->pos -= it->text - match;
1432 it->text = match;
1433 return true;
1435 text_iterator_prev(it);
1437 text_iterator_next(it);
1438 return false;
1441 bool text_iterator_byte_find_next(Iterator *it, char b) {
1442 while (it->text) {
1443 const char *match = memchr(it->text, b, it->end - it->text);
1444 if (match) {
1445 it->pos += match - it->text;
1446 it->text = match;
1447 return true;
1449 text_iterator_next(it);
1451 text_iterator_prev(it);
1452 return false;
1455 bool text_iterator_codepoint_next(Iterator *it, char *c) {
1456 while (text_iterator_byte_next(it, NULL)) {
1457 if (ISUTF8(*it->text)) {
1458 if (c)
1459 *c = *it->text;
1460 return true;
1463 return false;
1466 bool text_iterator_codepoint_prev(Iterator *it, char *c) {
1467 while (text_iterator_byte_prev(it, NULL)) {
1468 if (ISUTF8(*it->text)) {
1469 if (c)
1470 *c = *it->text;
1471 return true;
1474 return false;
1477 bool text_iterator_char_next(Iterator *it, char *c) {
1478 if (!text_iterator_codepoint_next(it, c))
1479 return false;
1480 mbstate_t ps = { 0 };
1481 for (;;) {
1482 char buf[MB_LEN_MAX];
1483 size_t len = text_bytes_get(it->piece->text, it->pos, sizeof buf, buf);
1484 wchar_t wc;
1485 size_t wclen = mbrtowc(&wc, buf, len, &ps);
1486 if (wclen == (size_t)-1 && errno == EILSEQ) {
1487 return true;
1488 } else if (wclen == (size_t)-2) {
1489 return false;
1490 } else if (wclen == 0) {
1491 return true;
1492 } else {
1493 int width = wcwidth(wc);
1494 if (width != 0)
1495 return true;
1496 if (!text_iterator_codepoint_next(it, c))
1497 return false;
1500 return true;
1503 bool text_iterator_char_prev(Iterator *it, char *c) {
1504 if (!text_iterator_codepoint_prev(it, c))
1505 return false;
1506 for (;;) {
1507 char buf[MB_LEN_MAX];
1508 size_t len = text_bytes_get(it->piece->text, it->pos, sizeof buf, buf);
1509 wchar_t wc;
1510 mbstate_t ps = { 0 };
1511 size_t wclen = mbrtowc(&wc, buf, len, &ps);
1512 if (wclen == (size_t)-1 && errno == EILSEQ) {
1513 return true;
1514 } else if (wclen == (size_t)-2) {
1515 return false;
1516 } else if (wclen == 0) {
1517 return true;
1518 } else {
1519 int width = wcwidth(wc);
1520 if (width != 0)
1521 return true;
1522 if (!text_iterator_codepoint_prev(it, c))
1523 return false;
1526 return true;
1529 bool text_byte_get(Text *txt, size_t pos, char *byte) {
1530 return text_bytes_get(txt, pos, 1, byte);
1533 size_t text_bytes_get(Text *txt, size_t pos, size_t len, char *buf) {
1534 if (!buf)
1535 return 0;
1536 char *cur = buf;
1537 size_t rem = len;
1538 for (Iterator it = text_iterator_get(txt, pos);
1539 text_iterator_valid(&it);
1540 text_iterator_next(&it)) {
1541 if (rem == 0)
1542 break;
1543 size_t piece_len = it.end - it.text;
1544 if (piece_len > rem)
1545 piece_len = rem;
1546 if (piece_len) {
1547 memcpy(cur, it.text, piece_len);
1548 cur += piece_len;
1549 rem -= piece_len;
1552 return len - rem;
1555 char *text_bytes_alloc0(Text *txt, size_t pos, size_t len) {
1556 if (len == SIZE_MAX)
1557 return NULL;
1558 char *buf = malloc(len+1);
1559 if (!buf)
1560 return NULL;
1561 len = text_bytes_get(txt, pos, len, buf);
1562 buf[len] = '\0';
1563 return buf;
1566 size_t text_size(Text *txt) {
1567 return txt->size;
1570 /* count the number of new lines '\n' in range [pos, pos+len) */
1571 static size_t lines_count(Text *txt, size_t pos, size_t len) {
1572 size_t lines = 0;
1573 for (Iterator it = text_iterator_get(txt, pos);
1574 text_iterator_valid(&it);
1575 text_iterator_next(&it)) {
1576 const char *start = it.text;
1577 while (len > 0 && start < it.end) {
1578 size_t n = MIN(len, (size_t)(it.end - start));
1579 const char *end = memchr(start, '\n', n);
1580 if (!end) {
1581 len -= n;
1582 break;
1584 lines++;
1585 len -= end - start + 1;
1586 start = end + 1;
1589 if (len == 0)
1590 break;
1592 return lines;
1595 /* skip n lines forward and return position afterwards */
1596 static size_t lines_skip_forward(Text *txt, size_t pos, size_t lines, size_t *lines_skipped) {
1597 size_t lines_old = lines;
1598 for (Iterator it = text_iterator_get(txt, pos);
1599 text_iterator_valid(&it);
1600 text_iterator_next(&it)) {
1601 const char *start = it.text;
1602 while (lines > 0 && start < it.end) {
1603 size_t n = it.end - start;
1604 const char *end = memchr(start, '\n', n);
1605 if (!end) {
1606 pos += n;
1607 break;
1609 pos += end - start + 1;
1610 start = end + 1;
1611 lines--;
1614 if (lines == 0)
1615 break;
1617 if (lines_skipped)
1618 *lines_skipped = lines_old - lines;
1619 return pos;
1622 static void lineno_cache_invalidate(LineCache *cache) {
1623 cache->pos = 0;
1624 cache->lineno = 1;
1627 size_t text_pos_by_lineno(Text *txt, size_t lineno) {
1628 size_t lines_skipped;
1629 LineCache *cache = &txt->lines;
1630 if (lineno <= 1)
1631 return 0;
1632 if (lineno > cache->lineno) {
1633 cache->pos = lines_skip_forward(txt, cache->pos, lineno - cache->lineno, &lines_skipped);
1634 cache->lineno += lines_skipped;
1635 } else if (lineno < cache->lineno) {
1636 #if 0
1637 // TODO does it make sense to scan memory backwards here?
1638 size_t diff = cache->lineno - lineno;
1639 if (diff < lineno) {
1640 lines_skip_backward(txt, cache->pos, diff);
1641 } else
1642 #endif
1643 cache->pos = lines_skip_forward(txt, 0, lineno - 1, &lines_skipped);
1644 cache->lineno = lines_skipped + 1;
1646 return cache->lineno == lineno ? cache->pos : EPOS;
1649 size_t text_lineno_by_pos(Text *txt, size_t pos) {
1650 LineCache *cache = &txt->lines;
1651 if (pos > txt->size)
1652 pos = txt->size;
1653 if (pos < cache->pos) {
1654 size_t diff = cache->pos - pos;
1655 if (diff < pos)
1656 cache->lineno -= lines_count(txt, pos, diff);
1657 else
1658 cache->lineno = lines_count(txt, 0, pos) + 1;
1659 } else if (pos > cache->pos) {
1660 cache->lineno += lines_count(txt, cache->pos, pos - cache->pos);
1662 cache->pos = text_line_begin(txt, pos);
1663 return cache->lineno;
1666 Mark text_mark_set(Text *txt, size_t pos) {
1667 if (pos == txt->size)
1668 return (Mark)&txt->end;
1669 Location loc = piece_get_extern(txt, pos);
1670 if (!loc.piece)
1671 return EMARK;
1672 return (Mark)(loc.piece->data + loc.off);
1675 size_t text_mark_get(Text *txt, Mark mark) {
1676 size_t cur = 0;
1678 if (mark == EMARK)
1679 return EPOS;
1680 if (mark == (Mark)&txt->end)
1681 return txt->size;
1683 for (Piece *p = txt->begin.next; p->next; p = p->next) {
1684 Mark start = (Mark)(p->data);
1685 Mark end = start + p->len;
1686 if (start <= mark && mark < end)
1687 return cur + (mark - start);
1688 cur += p->len;
1691 return EPOS;