2 /* Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc.
3 Written by James Clark (jjc@jclark.com)
5 This file is part of groff.
7 groff is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 groff is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License along
18 with groff; see the file COPYING. If not, write to the Free Software
19 Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
40 void *mapread(int fd
, int len
);
41 int unmap(void *, int len
);
44 const int minus_one
= -1;
50 class index_search_item
: public search_item
{
51 search_item
*out_of_date_files
;
61 char *filename_buffer
;
63 char **common_words_table
;
64 int common_words_table_size
;
65 const char *ignore_fields
;
68 const char *do_verify();
69 const int *search1(const char **pp
, const char *end
);
70 const int *search(const char *ptr
, int length
, int **temp_listp
);
71 const char *munge_filename(const char *);
72 void read_common_words_file();
73 void add_out_of_date_file(int fd
, const char *filename
, int fid
);
75 index_search_item(const char *, int);
78 search_item_iterator
*make_search_item_iterator(const char *);
81 int next_filename_id() const;
82 friend class index_search_item_iterator
;
85 class index_search_item_iterator
: public search_item_iterator
{
86 index_search_item
*indx
;
87 search_item_iterator
*out_of_date_files_iter
;
88 search_item
*next_out_of_date_file
;
89 const int *found_list
;
93 linear_searcher searcher
;
95 int get_tag(int tagno
, const linear_searcher
&, const char **, int *,
98 index_search_item_iterator(index_search_item
*, const char *);
99 ~index_search_item_iterator();
100 int next(const linear_searcher
&, const char **, int *, reference_id
*);
104 index_search_item::index_search_item(const char *filename
, int fid
)
105 : search_item(filename
, fid
), out_of_date_files(0), key_buffer(0),
106 filename_buffer(0), filename_buflen(0), common_words_table(0),
107 map_addr(0), map_len(0), buffer(0)
111 index_search_item::~index_search_item()
116 if (unmap(map_addr
, map_len
) < 0)
117 error("unmap: %1", strerror(errno
));
119 while (out_of_date_files
) {
120 search_item
*tem
= out_of_date_files
;
121 out_of_date_files
= out_of_date_files
->next
;
124 a_delete filename_buffer
;
126 if (common_words_table
) {
127 for (int i
= 0; i
< common_words_table_size
; i
++)
128 a_delete common_words_table
[i
];
129 a_delete common_words_table
;
136 file_closer(int &fd
) : fdp(&fd
) { }
137 ~file_closer() { close(*fdp
); }
140 int index_search_item::load(int fd
)
142 file_closer
fd_closer(fd
); // close fd on return
144 if (fstat(fd
, &sb
) < 0) {
145 error("can't fstat `%1': %2", name
, strerror(errno
));
148 if (!S_ISREG(sb
.st_mode
)) {
149 error("`%1' is not a regular file", name
);
153 int size
= int(sb
.st_size
);
155 map_addr
= mapread(fd
, size
);
157 addr
= (char *)map_addr
;
161 addr
= buffer
= (char *)malloc(size
);
163 error("can't allocate buffer for `%1'", name
);
167 int bytes_to_read
= size
;
168 while (bytes_to_read
> 0) {
169 int nread
= read(fd
, ptr
, bytes_to_read
);
171 error("unexpected EOF on `%1'", name
);
175 error("read error on `%1': %2", name
, strerror(errno
));
178 bytes_to_read
-= nread
;
182 header
= *(index_header
*)addr
;
183 if (header
.magic
!= INDEX_MAGIC
) {
184 error("`%1' is not an index file: wrong magic number", name
);
187 if (header
.version
!= INDEX_VERSION
) {
188 error("version number in `%1' is wrong: was %2, should be %3",
189 name
, header
.version
, INDEX_VERSION
);
192 int sz
= (header
.tags_size
* sizeof(tag
)
193 + header
.lists_size
* sizeof(int)
194 + header
.table_size
* sizeof(int)
195 + header
.strings_size
198 error("size of `%1' is wrong: was %2, should be %3",
202 tags
= (tag
*)(addr
+ sizeof(header
));
203 lists
= (int *)(tags
+ header
.tags_size
);
204 table
= (int *)(lists
+ header
.lists_size
);
205 pool
= (char *)(table
+ header
.table_size
);
206 ignore_fields
= strchr(strchr(pool
, '\0') + 1, '\0') + 1;
207 key_buffer
= new char[header
.truncate
];
208 read_common_words_file();
212 const char *index_search_item::do_verify()
216 if (lists
[header
.lists_size
- 1] >= 0)
217 return "last list element not negative";
219 for (i
= 0; i
< header
.table_size
; i
++) {
221 if (li
>= header
.lists_size
)
222 return "bad list index";
224 for (int *ptr
= lists
+ li
; *ptr
>= 0; ptr
++) {
225 if (*ptr
>= header
.tags_size
)
226 return "bad tag index";
227 if (*ptr
>= ptr
[1] && ptr
[1] >= 0)
228 return "list not ordered";
232 for (i
= 0; i
< header
.tags_size
; i
++) {
233 if (tags
[i
].filename_index
>= header
.strings_size
)
234 return "bad index in tags";
235 if (tags
[i
].length
< 0)
236 return "bad length in tags";
237 if (tags
[i
].start
< 0)
238 return "bad start in tags";
240 if (pool
[header
.strings_size
- 1] != '\0')
241 return "last character in pool not nul";
245 int index_search_item::verify()
247 const char *reason
= do_verify();
250 error("`%1' is bad: %2", name
, reason
);
254 int index_search_item::next_filename_id() const
256 return filename_id
+ header
.strings_size
+ 1;
259 search_item_iterator
*index_search_item::make_search_item_iterator(
262 return new index_search_item_iterator(this, query
);
265 search_item
*make_index_search_item(const char *filename
, int fid
)
267 char *index_filename
= new char[strlen(filename
) + sizeof(INDEX_SUFFIX
)];
268 strcpy(index_filename
, filename
);
269 strcat(index_filename
, INDEX_SUFFIX
);
270 int fd
= open(index_filename
, O_RDONLY
);
273 index_search_item
*item
= new index_search_item(index_filename
, fid
);
274 a_delete index_filename
;
275 if (!item
->load(fd
)) {
280 else if (verify_flag
&& !item
->verify()) {
291 index_search_item_iterator::index_search_item_iterator(index_search_item
*ind
,
293 : indx(ind
), buf(0), buflen(0), temp_list(0), query(strsave(q
)),
294 searcher(q
, strlen(q
), ind
->ignore_fields
, ind
->header
.truncate
),
295 out_of_date_files_iter(0), next_out_of_date_file(0)
297 found_list
= indx
->search(q
, strlen(q
), &temp_list
);
299 found_list
= &minus_one
;
300 warning("all keys would have been discarded in constructing index `%1'",
305 index_search_item_iterator::~index_search_item_iterator()
310 delete out_of_date_files_iter
;
313 int index_search_item_iterator::next(const linear_searcher
&,
314 const char **pp
, int *lenp
,
319 int tagno
= *found_list
;
323 if (get_tag(tagno
, searcher
, pp
, lenp
, ridp
))
327 next_out_of_date_file
= indx
->out_of_date_files
;
329 while (next_out_of_date_file
) {
330 if (out_of_date_files_iter
== 0)
331 out_of_date_files_iter
332 = next_out_of_date_file
->make_search_item_iterator(query
);
333 if (out_of_date_files_iter
->next(searcher
, pp
, lenp
, ridp
))
335 delete out_of_date_files_iter
;
336 out_of_date_files_iter
= 0;
337 next_out_of_date_file
= next_out_of_date_file
->next
;
342 int index_search_item_iterator::get_tag(int tagno
,
343 const linear_searcher
&searcher
,
344 const char **pp
, int *lenp
,
347 if (tagno
< 0 || tagno
>= indx
->header
.tags_size
) {
348 error("bad tag number");
351 tag
*tp
= indx
->tags
+ tagno
;
352 const char *filename
= indx
->munge_filename(indx
->pool
+ tp
->filename_index
);
353 int fd
= open(filename
, O_RDONLY
);
355 error("can't open `%1': %2", filename
, strerror(errno
));
359 if (fstat(fd
, &sb
) < 0) {
360 error("can't fstat: %1", strerror(errno
));
364 time_t mtime
= sb
.st_mtime
;
365 if (mtime
> indx
->mtime
) {
366 indx
->add_out_of_date_file(fd
, filename
,
367 indx
->filename_id
+ tp
->filename_index
);
371 FILE *fp
= fdopen(fd
, "r");
373 error("fdopen failed");
377 if (tp
->start
!= 0 && fseek(fp
, long(tp
->start
), 0) < 0)
378 error("can't seek on `%1': %2", filename
, strerror(errno
));
380 int length
= tp
->length
;
384 if (fstat(fileno(fp
), &sb
) < 0) {
385 error("can't stat `%1': %2", filename
, strerror(errno
));
388 else if ((sb
.st_mode
& S_IFMT
) != S_IFREG
) {
389 error("`%1' is not a regular file", filename
);
393 length
= int(sb
.st_size
);
396 if (length
+ 2 > buflen
) {
399 buf
= new char[buflen
];
401 if (fread(buf
+ 1, 1, length
, fp
) != length
)
402 error("fread on `%1' failed: %2", filename
, strerror(errno
));
405 buf
[length
+ 1] = '\n';
406 res
= searcher
.search(buf
+ 1, buf
+ 2 + length
, pp
, lenp
);
408 *ridp
= reference_id(indx
->filename_id
+ tp
->filename_index
,
417 const char *index_search_item::munge_filename(const char *filename
)
419 if (filename
[0] == '/')
421 const char *cwd
= pool
;
422 int need_slash
= (cwd
[0] != 0 && strchr(cwd
, '\0')[-1] != '/');
423 int len
= strlen(cwd
) + strlen(filename
) + need_slash
+ 1;
424 if (len
> filename_buflen
) {
425 a_delete filename_buffer
;
426 filename_buflen
= len
;
427 filename_buffer
= new char[len
];
429 strcpy(filename_buffer
, cwd
);
431 strcat(filename_buffer
, "/");
432 strcat(filename_buffer
, filename
);
433 return filename_buffer
;
436 const int *index_search_item::search1(const char **pp
, const char *end
)
438 while (*pp
< end
&& !csalnum(**pp
))
442 const char *start
= *pp
;
443 while (*pp
< end
&& csalnum(**pp
))
445 int len
= *pp
- start
;
446 if (len
< header
.shortest
)
448 if (len
> header
.truncate
)
449 len
= header
.truncate
;
451 for (int i
= 0; i
< len
; i
++)
452 if (csdigit(start
[i
]))
453 key_buffer
[i
] = start
[i
];
455 key_buffer
[i
] = cmlower(start
[i
]);
458 if (is_number
&& !(len
== 4 && start
[0] == '1' && start
[1] == '9'))
460 unsigned hc
= hash(key_buffer
, len
);
461 if (common_words_table
) {
462 for (int h
= hc
% common_words_table_size
;
463 common_words_table
[h
];
465 if (strlen(common_words_table
[h
]) == len
466 && memcmp(common_words_table
[h
], key_buffer
, len
) == 0)
469 h
= common_words_table_size
;
472 int li
= table
[int(hc
% header
.table_size
)];
473 return li
< 0 ? &minus_one
: lists
+ li
;
476 static void merge(int *result
, const int *s1
, const int *s2
)
478 for (; *s1
>= 0; s1
++) {
479 while (*s2
>= 0 && *s2
< *s1
)
487 const int *index_search_item::search(const char *ptr
, int length
,
490 const char *end
= ptr
+ length
;
492 a_delete
*temp_listp
;
495 const int *first_list
= 0;
496 while (ptr
< end
&& (first_list
= search1(&ptr
, end
)) == 0)
502 const int *second_list
= 0;
503 while (ptr
< end
&& (second_list
= search1(&ptr
, end
)) == 0)
507 if (*second_list
< 0)
509 for (const int *p
= first_list
; *p
>= 0; p
++)
511 int len
= p
- first_list
;
512 for (p
= second_list
; *p
>= 0; p
++)
514 if (p
- second_list
< len
)
515 len
= p
- second_list
;
516 int *matches
= new int[len
+ 1];
517 merge(matches
, first_list
, second_list
);
519 const int *list
= search1(&ptr
, end
);
525 merge(matches
, matches
, list
);
532 *temp_listp
= matches
;
536 void index_search_item::read_common_words_file()
538 if (header
.common
<= 0)
540 const char *common_words_file
= munge_filename(strchr(pool
, '\0') + 1);
542 FILE *fp
= fopen(common_words_file
, "r");
544 error("can't open `%1': %2", common_words_file
, strerror(errno
));
547 common_words_table_size
= 2*header
.common
+ 1;
548 while (!is_prime(common_words_table_size
))
549 common_words_table_size
++;
550 common_words_table
= new char *[common_words_table_size
];
551 for (int i
= 0; i
< common_words_table_size
; i
++)
552 common_words_table
[i
] = 0;
557 while (c
!= EOF
&& !csalnum(c
))
562 if (key_len
< header
.truncate
)
563 key_buffer
[key_len
++] = cmlower(c
);
565 } while (c
!= EOF
&& csalnum(c
));
566 if (key_len
>= header
.shortest
) {
567 int h
= hash(key_buffer
, key_len
) % common_words_table_size
;
568 while (common_words_table
[h
]) {
570 h
= common_words_table_size
;
573 common_words_table
[h
] = new char[key_len
+ 1];
574 memcpy(common_words_table
[h
], key_buffer
, key_len
);
575 common_words_table
[h
][key_len
] = '\0';
577 if (++count
>= header
.common
)
586 void index_search_item::add_out_of_date_file(int fd
, const char *filename
,
589 for (search_item
**pp
= &out_of_date_files
; *pp
; pp
= &(*pp
)->next
)
590 if ((*pp
)->is_named(filename
))
592 *pp
= make_linear_search_item(fd
, filename
, fid
);
593 warning("`%1' modified since `%2' created", filename
, name
);
596 void index_search_item::check_files()
598 const char *pool_end
= pool
+ header
.strings_size
;
599 for (const char *ptr
= strchr(ignore_fields
, '\0') + 1;
601 ptr
= strchr(ptr
, '\0') + 1) {
602 const char *path
= munge_filename(ptr
);
604 if (stat(path
, &sb
) < 0)
605 error("can't stat `%1': %2", path
, strerror(errno
));
606 else if (sb
.st_mtime
> mtime
) {
607 int fd
= open(path
, O_RDONLY
);
609 error("can't open `%1': %2", path
, strerror(errno
));
611 add_out_of_date_file(fd
, path
, filename_id
+ (ptr
- pool
));