2003-12-26 Guilhem Lavaux <guilhem@kaffe.org>
[official-gcc.git] / boehm-gc / cord / cordxtra.c
bloba5be10de58aed4ccd249f46ac036f0793d75d698
1 /*
2 * Copyright (c) 1993-1994 by Xerox Corporation. All rights reserved.
4 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
5 * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
7 * Permission is hereby granted to use or copy this program
8 * for any purpose, provided the above notices are retained on all copies.
9 * Permission to modify the code and to distribute modified code is granted,
10 * provided the above notices are retained, and a notice that the code was
11 * modified is included with the above copyright notice.
13 * Author: Hans-J. Boehm (boehm@parc.xerox.com)
16 * These are functions on cords that do not need to understand their
17 * implementation. They serve also serve as example client code for
18 * cord_basics.
20 /* Boehm, December 8, 1995 1:53 pm PST */
21 # include <stdio.h>
22 # include <string.h>
23 # include <stdlib.h>
24 # include <stdarg.h>
25 # include "cord.h"
26 # include "ec.h"
27 # define I_HIDE_POINTERS /* So we get access to allocation lock. */
28 /* We use this for lazy file reading, */
29 /* so that we remain independent */
30 /* of the threads primitives. */
31 # include "gc.h"
33 /* For now we assume that pointer reads and writes are atomic, */
34 /* i.e. another thread always sees the state before or after */
35 /* a write. This might be false on a Motorola M68K with */
36 /* pointers that are not 32-bit aligned. But there probably */
37 /* aren't too many threads packages running on those. */
38 # define ATOMIC_WRITE(x,y) (x) = (y)
39 # define ATOMIC_READ(x) (*(x))
41 /* The standard says these are in stdio.h, but they aren't always: */
42 # ifndef SEEK_SET
43 # define SEEK_SET 0
44 # endif
45 # ifndef SEEK_END
46 # define SEEK_END 2
47 # endif
49 # define BUFSZ 2048 /* Size of stack allocated buffers when */
50 /* we want large buffers. */
52 typedef void (* oom_fn)(void);
54 # define OUT_OF_MEMORY { if (CORD_oom_fn != (oom_fn) 0) (*CORD_oom_fn)(); \
55 ABORT("Out of memory\n"); }
56 # define ABORT(msg) { fprintf(stderr, "%s\n", msg); abort(); }
58 CORD CORD_cat_char(CORD x, char c)
60 register char * string;
62 if (c == '\0') return(CORD_cat(x, CORD_nul(1)));
63 string = GC_MALLOC_ATOMIC(2);
64 if (string == 0) OUT_OF_MEMORY;
65 string[0] = c;
66 string[1] = '\0';
67 return(CORD_cat_char_star(x, string, 1));
70 CORD CORD_catn(int nargs, ...)
72 register CORD result = CORD_EMPTY;
73 va_list args;
74 register int i;
76 va_start(args, nargs);
77 for (i = 0; i < nargs; i++) {
78 register CORD next = va_arg(args, CORD);
79 result = CORD_cat(result, next);
81 va_end(args);
82 return(result);
85 typedef struct {
86 size_t len;
87 size_t count;
88 char * buf;
89 } CORD_fill_data;
91 int CORD_fill_proc(char c, void * client_data)
93 register CORD_fill_data * d = (CORD_fill_data *)client_data;
94 register size_t count = d -> count;
96 (d -> buf)[count] = c;
97 d -> count = ++count;
98 if (count >= d -> len) {
99 return(1);
100 } else {
101 return(0);
105 int CORD_batched_fill_proc(const char * s, void * client_data)
107 register CORD_fill_data * d = (CORD_fill_data *)client_data;
108 register size_t count = d -> count;
109 register size_t max = d -> len;
110 register char * buf = d -> buf;
111 register const char * t = s;
113 while((buf[count] = *t++) != '\0') {
114 count++;
115 if (count >= max) {
116 d -> count = count;
117 return(1);
120 d -> count = count;
121 return(0);
124 /* Fill buf with len characters starting at i. */
125 /* Assumes len characters are available. */
126 void CORD_fill_buf(CORD x, size_t i, size_t len, char * buf)
128 CORD_fill_data fd;
130 fd.len = len;
131 fd.buf = buf;
132 fd.count = 0;
133 (void)CORD_iter5(x, i, CORD_fill_proc, CORD_batched_fill_proc, &fd);
136 int CORD_cmp(CORD x, CORD y)
138 CORD_pos xpos;
139 CORD_pos ypos;
140 register size_t avail, yavail;
142 if (y == CORD_EMPTY) return(x != CORD_EMPTY);
143 if (x == CORD_EMPTY) return(-1);
144 if (CORD_IS_STRING(y) && CORD_IS_STRING(x)) return(strcmp(x,y));
145 CORD_set_pos(xpos, x, 0);
146 CORD_set_pos(ypos, y, 0);
147 for(;;) {
148 if (!CORD_pos_valid(xpos)) {
149 if (CORD_pos_valid(ypos)) {
150 return(-1);
151 } else {
152 return(0);
155 if (!CORD_pos_valid(ypos)) {
156 return(1);
158 if ((avail = CORD_pos_chars_left(xpos)) <= 0
159 || (yavail = CORD_pos_chars_left(ypos)) <= 0) {
160 register char xcurrent = CORD_pos_fetch(xpos);
161 register char ycurrent = CORD_pos_fetch(ypos);
162 if (xcurrent != ycurrent) return(xcurrent - ycurrent);
163 CORD_next(xpos);
164 CORD_next(ypos);
165 } else {
166 /* process as many characters as we can */
167 register int result;
169 if (avail > yavail) avail = yavail;
170 result = strncmp(CORD_pos_cur_char_addr(xpos),
171 CORD_pos_cur_char_addr(ypos), avail);
172 if (result != 0) return(result);
173 CORD_pos_advance(xpos, avail);
174 CORD_pos_advance(ypos, avail);
179 int CORD_ncmp(CORD x, size_t x_start, CORD y, size_t y_start, size_t len)
181 CORD_pos xpos;
182 CORD_pos ypos;
183 register size_t count;
184 register long avail, yavail;
186 CORD_set_pos(xpos, x, x_start);
187 CORD_set_pos(ypos, y, y_start);
188 for(count = 0; count < len;) {
189 if (!CORD_pos_valid(xpos)) {
190 if (CORD_pos_valid(ypos)) {
191 return(-1);
192 } else {
193 return(0);
196 if (!CORD_pos_valid(ypos)) {
197 return(1);
199 if ((avail = CORD_pos_chars_left(xpos)) <= 0
200 || (yavail = CORD_pos_chars_left(ypos)) <= 0) {
201 register char xcurrent = CORD_pos_fetch(xpos);
202 register char ycurrent = CORD_pos_fetch(ypos);
203 if (xcurrent != ycurrent) return(xcurrent - ycurrent);
204 CORD_next(xpos);
205 CORD_next(ypos);
206 count++;
207 } else {
208 /* process as many characters as we can */
209 register int result;
211 if (avail > yavail) avail = yavail;
212 count += avail;
213 if (count > len) avail -= (count - len);
214 result = strncmp(CORD_pos_cur_char_addr(xpos),
215 CORD_pos_cur_char_addr(ypos), (size_t)avail);
216 if (result != 0) return(result);
217 CORD_pos_advance(xpos, (size_t)avail);
218 CORD_pos_advance(ypos, (size_t)avail);
221 return(0);
224 char * CORD_to_char_star(CORD x)
226 register size_t len = CORD_len(x);
227 char * result = GC_MALLOC_ATOMIC(len + 1);
229 if (result == 0) OUT_OF_MEMORY;
230 CORD_fill_buf(x, 0, len, result);
231 result[len] = '\0';
232 return(result);
235 CORD CORD_from_char_star(const char *s)
237 char * result;
238 size_t len = strlen(s);
240 if (0 == len) return(CORD_EMPTY);
241 result = GC_MALLOC_ATOMIC(len + 1);
242 if (result == 0) OUT_OF_MEMORY;
243 memcpy(result, s, len+1);
244 return(result);
247 const char * CORD_to_const_char_star(CORD x)
249 if (x == 0) return("");
250 if (CORD_IS_STRING(x)) return((const char *)x);
251 return(CORD_to_char_star(x));
254 char CORD_fetch(CORD x, size_t i)
256 CORD_pos xpos;
258 CORD_set_pos(xpos, x, i);
259 if (!CORD_pos_valid(xpos)) ABORT("bad index?");
260 return(CORD_pos_fetch(xpos));
264 int CORD_put_proc(char c, void * client_data)
266 register FILE * f = (FILE *)client_data;
268 return(putc(c, f) == EOF);
271 int CORD_batched_put_proc(const char * s, void * client_data)
273 register FILE * f = (FILE *)client_data;
275 return(fputs(s, f) == EOF);
279 int CORD_put(CORD x, FILE * f)
281 if (CORD_iter5(x, 0, CORD_put_proc, CORD_batched_put_proc, f)) {
282 return(EOF);
283 } else {
284 return(1);
288 typedef struct {
289 size_t pos; /* Current position in the cord */
290 char target; /* Character we're looking for */
291 } chr_data;
293 int CORD_chr_proc(char c, void * client_data)
295 register chr_data * d = (chr_data *)client_data;
297 if (c == d -> target) return(1);
298 (d -> pos) ++;
299 return(0);
302 int CORD_rchr_proc(char c, void * client_data)
304 register chr_data * d = (chr_data *)client_data;
306 if (c == d -> target) return(1);
307 (d -> pos) --;
308 return(0);
311 int CORD_batched_chr_proc(const char *s, void * client_data)
313 register chr_data * d = (chr_data *)client_data;
314 register char * occ = strchr(s, d -> target);
316 if (occ == 0) {
317 d -> pos += strlen(s);
318 return(0);
319 } else {
320 d -> pos += occ - s;
321 return(1);
325 size_t CORD_chr(CORD x, size_t i, int c)
327 chr_data d;
329 d.pos = i;
330 d.target = c;
331 if (CORD_iter5(x, i, CORD_chr_proc, CORD_batched_chr_proc, &d)) {
332 return(d.pos);
333 } else {
334 return(CORD_NOT_FOUND);
338 size_t CORD_rchr(CORD x, size_t i, int c)
340 chr_data d;
342 d.pos = i;
343 d.target = c;
344 if (CORD_riter4(x, i, CORD_rchr_proc, &d)) {
345 return(d.pos);
346 } else {
347 return(CORD_NOT_FOUND);
351 /* Find the first occurrence of s in x at position start or later. */
352 /* This uses an asymptotically poor algorithm, which should typically */
353 /* perform acceptably. We compare the first few characters directly, */
354 /* and call CORD_ncmp whenever there is a partial match. */
355 /* This has the advantage that we allocate very little, or not at all. */
356 /* It's very fast if there are few close misses. */
357 size_t CORD_str(CORD x, size_t start, CORD s)
359 CORD_pos xpos;
360 size_t xlen = CORD_len(x);
361 size_t slen;
362 register size_t start_len;
363 const char * s_start;
364 unsigned long s_buf = 0; /* The first few characters of s */
365 unsigned long x_buf = 0; /* Start of candidate substring. */
366 /* Initialized only to make compilers */
367 /* happy. */
368 unsigned long mask = 0;
369 register size_t i;
370 register size_t match_pos;
372 if (s == CORD_EMPTY) return(start);
373 if (CORD_IS_STRING(s)) {
374 s_start = s;
375 slen = strlen(s);
376 } else {
377 s_start = CORD_to_char_star(CORD_substr(s, 0, sizeof(unsigned long)));
378 slen = CORD_len(s);
380 if (xlen < start || xlen - start < slen) return(CORD_NOT_FOUND);
381 start_len = slen;
382 if (start_len > sizeof(unsigned long)) start_len = sizeof(unsigned long);
383 CORD_set_pos(xpos, x, start);
384 for (i = 0; i < start_len; i++) {
385 mask <<= 8;
386 mask |= 0xff;
387 s_buf <<= 8;
388 s_buf |= s_start[i];
389 x_buf <<= 8;
390 x_buf |= CORD_pos_fetch(xpos);
391 CORD_next(xpos);
393 for (match_pos = start; ; match_pos++) {
394 if ((x_buf & mask) == s_buf) {
395 if (slen == start_len ||
396 CORD_ncmp(x, match_pos + start_len,
397 s, start_len, slen - start_len) == 0) {
398 return(match_pos);
401 if ( match_pos == xlen - slen ) {
402 return(CORD_NOT_FOUND);
404 x_buf <<= 8;
405 x_buf |= CORD_pos_fetch(xpos);
406 CORD_next(xpos);
410 void CORD_ec_flush_buf(CORD_ec x)
412 register size_t len = x[0].ec_bufptr - x[0].ec_buf;
413 char * s;
415 if (len == 0) return;
416 s = GC_MALLOC_ATOMIC(len+1);
417 memcpy(s, x[0].ec_buf, len);
418 s[len] = '\0';
419 x[0].ec_cord = CORD_cat_char_star(x[0].ec_cord, s, len);
420 x[0].ec_bufptr = x[0].ec_buf;
423 void CORD_ec_append_cord(CORD_ec x, CORD s)
425 CORD_ec_flush_buf(x);
426 x[0].ec_cord = CORD_cat(x[0].ec_cord, s);
429 /*ARGSUSED*/
430 char CORD_nul_func(size_t i, void * client_data)
432 return((char)(unsigned long)client_data);
436 CORD CORD_chars(char c, size_t i)
438 return(CORD_from_fn(CORD_nul_func, (void *)(unsigned long)c, i));
441 CORD CORD_from_file_eager(FILE * f)
443 register int c;
444 CORD_ec ecord;
446 CORD_ec_init(ecord);
447 for(;;) {
448 c = getc(f);
449 if (c == 0) {
450 /* Append the right number of NULs */
451 /* Note that any string of NULs is rpresented in 4 words, */
452 /* independent of its length. */
453 register size_t count = 1;
455 CORD_ec_flush_buf(ecord);
456 while ((c = getc(f)) == 0) count++;
457 ecord[0].ec_cord = CORD_cat(ecord[0].ec_cord, CORD_nul(count));
459 if (c == EOF) break;
460 CORD_ec_append(ecord, c);
462 (void) fclose(f);
463 return(CORD_balance(CORD_ec_to_cord(ecord)));
466 /* The state maintained for a lazily read file consists primarily */
467 /* of a large direct-mapped cache of previously read values. */
468 /* We could rely more on stdio buffering. That would have 2 */
469 /* disadvantages: */
470 /* 1) Empirically, not all fseek implementations preserve the */
471 /* buffer whenever they could. */
472 /* 2) It would fail if 2 different sections of a long cord */
473 /* were being read alternately. */
474 /* We do use the stdio buffer for read ahead. */
475 /* To guarantee thread safety in the presence of atomic pointer */
476 /* writes, cache lines are always replaced, and never modified in */
477 /* place. */
479 # define LOG_CACHE_SZ 14
480 # define CACHE_SZ (1 << LOG_CACHE_SZ)
481 # define LOG_LINE_SZ 9
482 # define LINE_SZ (1 << LOG_LINE_SZ)
484 typedef struct {
485 size_t tag;
486 char data[LINE_SZ];
487 /* data[i%LINE_SZ] = ith char in file if tag = i/LINE_SZ */
488 } cache_line;
490 typedef struct {
491 FILE * lf_file;
492 size_t lf_current; /* Current file pointer value */
493 cache_line * volatile lf_cache[CACHE_SZ/LINE_SZ];
494 } lf_state;
496 # define MOD_CACHE_SZ(n) ((n) & (CACHE_SZ - 1))
497 # define DIV_CACHE_SZ(n) ((n) >> LOG_CACHE_SZ)
498 # define MOD_LINE_SZ(n) ((n) & (LINE_SZ - 1))
499 # define DIV_LINE_SZ(n) ((n) >> LOG_LINE_SZ)
500 # define LINE_START(n) ((n) & ~(LINE_SZ - 1))
502 typedef struct {
503 lf_state * state;
504 size_t file_pos; /* Position of needed character. */
505 cache_line * new_cache;
506 } refill_data;
508 /* Executed with allocation lock. */
509 static char refill_cache(client_data)
510 refill_data * client_data;
512 register lf_state * state = client_data -> state;
513 register size_t file_pos = client_data -> file_pos;
514 FILE *f = state -> lf_file;
515 size_t line_start = LINE_START(file_pos);
516 size_t line_no = DIV_LINE_SZ(MOD_CACHE_SZ(file_pos));
517 cache_line * new_cache = client_data -> new_cache;
519 if (line_start != state -> lf_current
520 && fseek(f, line_start, SEEK_SET) != 0) {
521 ABORT("fseek failed");
523 if (fread(new_cache -> data, sizeof(char), LINE_SZ, f)
524 <= file_pos - line_start) {
525 ABORT("fread failed");
527 new_cache -> tag = DIV_LINE_SZ(file_pos);
528 /* Store barrier goes here. */
529 ATOMIC_WRITE(state -> lf_cache[line_no], new_cache);
530 state -> lf_current = line_start + LINE_SZ;
531 return(new_cache->data[MOD_LINE_SZ(file_pos)]);
534 char CORD_lf_func(size_t i, void * client_data)
536 register lf_state * state = (lf_state *)client_data;
537 register cache_line * volatile * cl_addr =
538 &(state -> lf_cache[DIV_LINE_SZ(MOD_CACHE_SZ(i))]);
539 register cache_line * cl = (cache_line *)ATOMIC_READ(cl_addr);
541 if (cl == 0 || cl -> tag != DIV_LINE_SZ(i)) {
542 /* Cache miss */
543 refill_data rd;
545 rd.state = state;
546 rd.file_pos = i;
547 rd.new_cache = GC_NEW_ATOMIC(cache_line);
548 if (rd.new_cache == 0) OUT_OF_MEMORY;
549 return((char)(GC_word)
550 GC_call_with_alloc_lock((GC_fn_type) refill_cache, &rd));
552 return(cl -> data[MOD_LINE_SZ(i)]);
555 /*ARGSUSED*/
556 void CORD_lf_close_proc(void * obj, void * client_data)
558 if (fclose(((lf_state *)obj) -> lf_file) != 0) {
559 ABORT("CORD_lf_close_proc: fclose failed");
563 CORD CORD_from_file_lazy_inner(FILE * f, size_t len)
565 register lf_state * state = GC_NEW(lf_state);
566 register int i;
568 if (state == 0) OUT_OF_MEMORY;
569 if (len != 0) {
570 /* Dummy read to force buffer allocation. */
571 /* This greatly increases the probability */
572 /* of avoiding deadlock if buffer allocation */
573 /* is redirected to GC_malloc and the */
574 /* world is multithreaded. */
575 char buf[1];
577 (void) fread(buf, 1, 1, f);
578 rewind(f);
580 state -> lf_file = f;
581 for (i = 0; i < CACHE_SZ/LINE_SZ; i++) {
582 state -> lf_cache[i] = 0;
584 state -> lf_current = 0;
585 GC_REGISTER_FINALIZER(state, CORD_lf_close_proc, 0, 0, 0);
586 return(CORD_from_fn(CORD_lf_func, state, len));
589 CORD CORD_from_file_lazy(FILE * f)
591 register long len;
593 if (fseek(f, 0l, SEEK_END) != 0) {
594 ABORT("Bad fd argument - fseek failed");
596 if ((len = ftell(f)) < 0) {
597 ABORT("Bad fd argument - ftell failed");
599 rewind(f);
600 return(CORD_from_file_lazy_inner(f, (size_t)len));
603 # define LAZY_THRESHOLD (128*1024 + 1)
605 CORD CORD_from_file(FILE * f)
607 register long len;
609 if (fseek(f, 0l, SEEK_END) != 0) {
610 ABORT("Bad fd argument - fseek failed");
612 if ((len = ftell(f)) < 0) {
613 ABORT("Bad fd argument - ftell failed");
615 rewind(f);
616 if (len < LAZY_THRESHOLD) {
617 return(CORD_from_file_eager(f));
618 } else {
619 return(CORD_from_file_lazy_inner(f, (size_t)len));