2 /* Copyright (C) 1989, 1990, 1991, 1992, 2001, 2004
3 Free Software Foundation, Inc.
4 Written by James Clark (jjc@jclark.com)
6 This file is part of groff.
8 groff is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
13 groff is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License along
19 with groff; see the file COPYING. If not, write to the Free Software
20 Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */
42 void *mapread(int fd
, int len
);
43 int unmap(void *, int len
);
55 class index_search_item
: public search_item
{
56 search_item
*out_of_date_files
;
66 char *filename_buffer
;
68 char **common_words_table
;
69 int common_words_table_size
;
70 const char *ignore_fields
;
73 const char *do_verify();
74 const int *search1(const char **pp
, const char *end
);
75 const int *search(const char *ptr
, int length
, int **temp_listp
);
76 const char *munge_filename(const char *);
77 void read_common_words_file();
78 void add_out_of_date_file(int fd
, const char *filename
, int fid
);
80 index_search_item(const char *, int);
83 search_item_iterator
*make_search_item_iterator(const char *);
86 int next_filename_id() const;
87 friend class index_search_item_iterator
;
90 class index_search_item_iterator
: public search_item_iterator
{
91 index_search_item
*indx
;
92 search_item_iterator
*out_of_date_files_iter
;
93 search_item
*next_out_of_date_file
;
94 const int *found_list
;
98 linear_searcher searcher
;
100 int get_tag(int tagno
, const linear_searcher
&, const char **, int *,
103 index_search_item_iterator(index_search_item
*, const char *);
104 ~index_search_item_iterator();
105 int next(const linear_searcher
&, const char **, int *, reference_id
*);
109 index_search_item::index_search_item(const char *filename
, int fid
)
110 : search_item(filename
, fid
), out_of_date_files(0), buffer(0), map_addr(0),
111 map_len(0), key_buffer(0), filename_buffer(0), filename_buflen(0),
112 common_words_table(0)
116 index_search_item::~index_search_item()
121 if (unmap(map_addr
, map_len
) < 0)
122 error("unmap: %1", strerror(errno
));
124 while (out_of_date_files
) {
125 search_item
*tem
= out_of_date_files
;
126 out_of_date_files
= out_of_date_files
->next
;
129 a_delete filename_buffer
;
131 if (common_words_table
) {
132 for (int i
= 0; i
< common_words_table_size
; i
++)
133 a_delete common_words_table
[i
];
134 a_delete common_words_table
;
141 file_closer(int &fd
) : fdp(&fd
) { }
142 ~file_closer() { close(*fdp
); }
145 // Tell the compiler that a variable is intentionally unused.
146 inline void unused(void *) { }
148 int index_search_item::load(int fd
)
150 file_closer
fd_closer(fd
); // close fd on return
153 if (fstat(fd
, &sb
) < 0) {
154 error("can't fstat `%1': %2", name
, strerror(errno
));
157 if (!S_ISREG(sb
.st_mode
)) {
158 error("`%1' is not a regular file", name
);
162 int size
= int(sb
.st_size
);
164 map_addr
= mapread(fd
, size
);
166 addr
= (char *)map_addr
;
170 addr
= buffer
= (char *)malloc(size
);
172 error("can't allocate buffer for `%1'", name
);
176 int bytes_to_read
= size
;
177 while (bytes_to_read
> 0) {
178 int nread
= read(fd
, ptr
, bytes_to_read
);
180 error("unexpected EOF on `%1'", name
);
184 error("read error on `%1': %2", name
, strerror(errno
));
187 bytes_to_read
-= nread
;
191 header
= *(index_header
*)addr
;
192 if (header
.magic
!= INDEX_MAGIC
) {
193 error("`%1' is not an index file: wrong magic number", name
);
196 if (header
.version
!= INDEX_VERSION
) {
197 error("version number in `%1' is wrong: was %2, should be %3",
198 name
, header
.version
, INDEX_VERSION
);
201 int sz
= (header
.tags_size
* sizeof(tag
)
202 + header
.lists_size
* sizeof(int)
203 + header
.table_size
* sizeof(int)
204 + header
.strings_size
207 error("size of `%1' is wrong: was %2, should be %3",
211 tags
= (tag
*)(addr
+ sizeof(header
));
212 lists
= (int *)(tags
+ header
.tags_size
);
213 table
= (int *)(lists
+ header
.lists_size
);
214 pool
= (char *)(table
+ header
.table_size
);
215 ignore_fields
= strchr(strchr(pool
, '\0') + 1, '\0') + 1;
216 key_buffer
= new char[header
.truncate
];
217 read_common_words_file();
221 const char *index_search_item::do_verify()
225 if (lists
[header
.lists_size
- 1] >= 0)
226 return "last list element not negative";
228 for (i
= 0; i
< header
.table_size
; i
++) {
230 if (li
>= header
.lists_size
)
231 return "bad list index";
233 for (int *ptr
= lists
+ li
; *ptr
>= 0; ptr
++) {
234 if (*ptr
>= header
.tags_size
)
235 return "bad tag index";
236 if (*ptr
>= ptr
[1] && ptr
[1] >= 0)
237 return "list not ordered";
241 for (i
= 0; i
< header
.tags_size
; i
++) {
242 if (tags
[i
].filename_index
>= header
.strings_size
)
243 return "bad index in tags";
244 if (tags
[i
].length
< 0)
245 return "bad length in tags";
246 if (tags
[i
].start
< 0)
247 return "bad start in tags";
249 if (pool
[header
.strings_size
- 1] != '\0')
250 return "last character in pool not nul";
254 int index_search_item::verify()
256 const char *reason
= do_verify();
259 error("`%1' is bad: %2", name
, reason
);
263 int index_search_item::next_filename_id() const
265 return filename_id
+ header
.strings_size
+ 1;
268 search_item_iterator
*index_search_item::make_search_item_iterator(
271 return new index_search_item_iterator(this, query
);
274 search_item
*make_index_search_item(const char *filename
, int fid
)
276 char *index_filename
= new char[strlen(filename
) + sizeof(INDEX_SUFFIX
)];
277 strcpy(index_filename
, filename
);
278 strcat(index_filename
, INDEX_SUFFIX
);
279 int fd
= open(index_filename
, O_RDONLY
| O_BINARY
);
282 index_search_item
*item
= new index_search_item(index_filename
, fid
);
283 a_delete index_filename
;
284 if (!item
->load(fd
)) {
289 else if (verify_flag
&& !item
->verify()) {
300 index_search_item_iterator::index_search_item_iterator(index_search_item
*ind
,
302 : indx(ind
), out_of_date_files_iter(0), next_out_of_date_file(0), temp_list(0),
304 searcher(q
, strlen(q
), ind
->ignore_fields
, ind
->header
.truncate
),
307 found_list
= indx
->search(q
, strlen(q
), &temp_list
);
309 found_list
= &minus_one
;
310 warning("all keys would have been discarded in constructing index `%1'",
315 index_search_item_iterator::~index_search_item_iterator()
320 delete out_of_date_files_iter
;
323 int index_search_item_iterator::next(const linear_searcher
&,
324 const char **pp
, int *lenp
,
329 int tagno
= *found_list
;
333 if (get_tag(tagno
, searcher
, pp
, lenp
, ridp
))
337 next_out_of_date_file
= indx
->out_of_date_files
;
339 while (next_out_of_date_file
) {
340 if (out_of_date_files_iter
== 0)
341 out_of_date_files_iter
342 = next_out_of_date_file
->make_search_item_iterator(query
);
343 if (out_of_date_files_iter
->next(searcher
, pp
, lenp
, ridp
))
345 delete out_of_date_files_iter
;
346 out_of_date_files_iter
= 0;
347 next_out_of_date_file
= next_out_of_date_file
->next
;
352 int index_search_item_iterator::get_tag(int tagno
,
353 const linear_searcher
&searchr
,
354 const char **pp
, int *lenp
,
357 if (tagno
< 0 || tagno
>= indx
->header
.tags_size
) {
358 error("bad tag number");
361 tag
*tp
= indx
->tags
+ tagno
;
362 const char *filename
= indx
->munge_filename(indx
->pool
+ tp
->filename_index
);
363 int fd
= open(filename
, O_RDONLY
| O_BINARY
);
365 error("can't open `%1': %2", filename
, strerror(errno
));
369 if (fstat(fd
, &sb
) < 0) {
370 error("can't fstat: %1", strerror(errno
));
374 time_t mtime
= sb
.st_mtime
;
375 if (mtime
> indx
->mtime
) {
376 indx
->add_out_of_date_file(fd
, filename
,
377 indx
->filename_id
+ tp
->filename_index
);
381 FILE *fp
= fdopen(fd
, FOPEN_RB
);
383 error("fdopen failed");
387 if (tp
->start
!= 0 && fseek(fp
, long(tp
->start
), 0) < 0)
388 error("can't seek on `%1': %2", filename
, strerror(errno
));
390 int length
= tp
->length
;
393 if (fstat(fileno(fp
), &sb
) < 0) {
394 error("can't stat `%1': %2", filename
, strerror(errno
));
397 else if (!S_ISREG(sb
.st_mode
)) {
398 error("`%1' is not a regular file", filename
);
402 length
= int(sb
.st_size
);
405 if (length
+ 2 > buflen
) {
408 buf
= new char[buflen
];
410 if (fread(buf
+ 1, 1, length
, fp
) != (size_t)length
)
411 error("fread on `%1' failed: %2", filename
, strerror(errno
));
414 // Remove the CR characters from CRLF pairs.
415 int sidx
= 1, didx
= 1;
416 for ( ; sidx
< length
+ 1; sidx
++, didx
++)
418 if (buf
[sidx
] == '\r')
420 if (buf
[++sidx
] != '\n')
426 buf
[didx
] = buf
[sidx
];
428 buf
[length
+ 1] = '\n';
429 res
= searchr
.search(buf
+ 1, buf
+ 2 + length
, pp
, lenp
);
431 *ridp
= reference_id(indx
->filename_id
+ tp
->filename_index
,
440 const char *index_search_item::munge_filename(const char *filename
)
442 if (IS_ABSOLUTE(filename
))
444 const char *cwd
= pool
;
445 int need_slash
= (cwd
[0] != 0
446 && strchr(DIR_SEPS
, strchr(cwd
, '\0')[-1]) == 0);
447 int len
= strlen(cwd
) + strlen(filename
) + need_slash
+ 1;
448 if (len
> filename_buflen
) {
449 a_delete filename_buffer
;
450 filename_buflen
= len
;
451 filename_buffer
= new char[len
];
453 strcpy(filename_buffer
, cwd
);
455 strcat(filename_buffer
, "/");
456 strcat(filename_buffer
, filename
);
457 return filename_buffer
;
460 const int *index_search_item::search1(const char **pp
, const char *end
)
462 while (*pp
< end
&& !csalnum(**pp
))
466 const char *start
= *pp
;
467 while (*pp
< end
&& csalnum(**pp
))
469 int len
= *pp
- start
;
470 if (len
< header
.shortest
)
472 if (len
> header
.truncate
)
473 len
= header
.truncate
;
475 for (int i
= 0; i
< len
; i
++)
476 if (csdigit(start
[i
]))
477 key_buffer
[i
] = start
[i
];
479 key_buffer
[i
] = cmlower(start
[i
]);
482 if (is_number
&& !(len
== 4 && start
[0] == '1' && start
[1] == '9'))
484 unsigned hc
= hash(key_buffer
, len
);
485 if (common_words_table
) {
486 for (int h
= hc
% common_words_table_size
;
487 common_words_table
[h
];
489 if (strlen(common_words_table
[h
]) == (size_t)len
490 && memcmp(common_words_table
[h
], key_buffer
, len
) == 0)
493 h
= common_words_table_size
;
496 int li
= table
[int(hc
% header
.table_size
)];
497 return li
< 0 ? &minus_one
: lists
+ li
;
500 static void merge(int *result
, const int *s1
, const int *s2
)
502 for (; *s1
>= 0; s1
++) {
503 while (*s2
>= 0 && *s2
< *s1
)
511 const int *index_search_item::search(const char *ptr
, int length
,
514 const char *end
= ptr
+ length
;
516 a_delete
*temp_listp
;
519 const int *first_list
= 0;
520 while (ptr
< end
&& (first_list
= search1(&ptr
, end
)) == 0)
526 const int *second_list
= 0;
527 while (ptr
< end
&& (second_list
= search1(&ptr
, end
)) == 0)
531 if (*second_list
< 0)
534 for (p
= first_list
; *p
>= 0; p
++)
536 int len
= p
- first_list
;
537 for (p
= second_list
; *p
>= 0; p
++)
539 if (p
- second_list
< len
)
540 len
= p
- second_list
;
541 int *matches
= new int[len
+ 1];
542 merge(matches
, first_list
, second_list
);
544 const int *list
= search1(&ptr
, end
);
550 merge(matches
, matches
, list
);
557 *temp_listp
= matches
;
561 void index_search_item::read_common_words_file()
563 if (header
.common
<= 0)
565 const char *common_words_file
= munge_filename(strchr(pool
, '\0') + 1);
567 FILE *fp
= fopen(common_words_file
, "r");
569 error("can't open `%1': %2", common_words_file
, strerror(errno
));
572 common_words_table_size
= 2*header
.common
+ 1;
573 while (!is_prime(common_words_table_size
))
574 common_words_table_size
++;
575 common_words_table
= new char *[common_words_table_size
];
576 for (int i
= 0; i
< common_words_table_size
; i
++)
577 common_words_table
[i
] = 0;
582 while (c
!= EOF
&& !csalnum(c
))
587 if (key_len
< header
.truncate
)
588 key_buffer
[key_len
++] = cmlower(c
);
590 } while (c
!= EOF
&& csalnum(c
));
591 if (key_len
>= header
.shortest
) {
592 int h
= hash(key_buffer
, key_len
) % common_words_table_size
;
593 while (common_words_table
[h
]) {
595 h
= common_words_table_size
;
598 common_words_table
[h
] = new char[key_len
+ 1];
599 memcpy(common_words_table
[h
], key_buffer
, key_len
);
600 common_words_table
[h
][key_len
] = '\0';
602 if (++count
>= header
.common
)
611 void index_search_item::add_out_of_date_file(int fd
, const char *filename
,
615 for (pp
= &out_of_date_files
; *pp
; pp
= &(*pp
)->next
)
616 if ((*pp
)->is_named(filename
))
618 *pp
= make_linear_search_item(fd
, filename
, fid
);
619 warning("`%1' modified since `%2' created", filename
, name
);
622 void index_search_item::check_files()
624 const char *pool_end
= pool
+ header
.strings_size
;
625 for (const char *ptr
= strchr(ignore_fields
, '\0') + 1;
627 ptr
= strchr(ptr
, '\0') + 1) {
628 const char *path
= munge_filename(ptr
);
630 if (stat(path
, &sb
) < 0)
631 error("can't stat `%1': %2", path
, strerror(errno
));
632 else if (sb
.st_mtime
> mtime
) {
633 int fd
= open(path
, O_RDONLY
| O_BINARY
);
635 error("can't open `%1': %2", path
, strerror(errno
));
637 add_out_of_date_file(fd
, path
, filename_id
+ (ptr
- pool
));