2 /* Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc.
3 Written by James Clark (jjc@jclark.com)
5 This file is part of groff.
7 groff is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 groff is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License along
18 with groff; see the file COPYING. If not, write to the Free Software
19 Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
43 int load(int fd
, const char *filename
);
44 const char *get_start() const;
45 const char *get_end() const;
48 typedef unsigned char uchar
;
50 static uchar map
[256];
51 static uchar inv_map
[256][3];
57 static map_init the_map_init
;
61 for (int i
= 0; i
< 256; i
++)
62 map
[i
] = csalnum(i
) ? cmlower(i
) : '\0';
63 for (i
= 0; i
< 256; i
++) {
66 inv_map
[i
][1] = cmupper(i
);
69 else if (csdigit(i
)) {
84 bmpattern(const char *pattern
, int pattern_length
);
86 const char *search(const char *p
, const char *end
) const;
90 bmpattern::bmpattern(const char *pattern
, int pattern_length
)
95 for (i
= 0; i
< len
; i
++)
96 pat
[i
] = map
[uchar(pattern
[i
])];
97 for (i
= 0; i
< 256; i
++)
99 for (i
= 0; i
< len
; i
++)
100 for (const unsigned char *inv
= inv_map
[uchar(pat
[i
])]; *inv
; inv
++)
101 delta
[*inv
] = len
- i
- 1;
104 const char *bmpattern::search(const char *buf
, const char *end
) const
106 int buflen
= end
- buf
;
111 strend
= end
- len
*4;
114 const char *k
= buf
+ len
- 1;
115 const int *del
= delta
;
116 const char *pattern
= pat
;
119 int t
= del
[uchar(*k
)];
126 while (k
< end
&& del
[uchar(*k
)] != 0)
135 if (map
[uchar(*--s
)] != pattern
[--j
])
143 bmpattern::~bmpattern()
148 inline int bmpattern::length() const
154 static const char *find_end(const char *bufend
, const char *p
);
156 const char *linear_searcher::search_and_check(const bmpattern
*key
,
157 const char *buf
, const char *bufend
, const char **start
) const
159 assert(buf
[-1] == '\n');
160 assert(bufend
[-1] == '\n');
161 const char *ptr
= buf
;
163 const char *found
= key
->search(ptr
, bufend
);
166 if (check_match(buf
, bufend
, found
, key
->length(), &ptr
, start
))
172 static const char *skip_field(const char *end
, const char *p
)
176 if (p
== end
|| *p
== '%')
178 for (const char *q
= p
; *q
== ' ' || *q
== '\t'; q
++)
187 static const char *find_end(const char *bufend
, const char *p
)
193 for (const char *q
= p
; *q
== ' ' || *q
== '\t'; q
++)
203 int linear_searcher::check_match(const char *buf
, const char *bufend
,
204 const char *match
, int matchlen
,
205 const char **cont
, const char **start
) const
208 // The user is required to supply only the first truncate_len characters
209 // of the key. If truncate_len <= 0, he must supply all the key.
210 if ((truncate_len
<= 0 || matchlen
< truncate_len
)
211 && map
[uchar(match
[matchlen
])] != '\0')
214 // The character before the match must not be an alphanumeric
215 // character (unless the alphanumeric character follows one or two
216 // percent characters at the beginning of the line), nor must it be
217 // a percent character at the beginning of a line, nor a percent
218 // character following a percent character at the beginning of a
221 switch (match
- buf
) {
225 if (match
[-1] == '%' || map
[uchar(match
[-1])] != '\0')
229 if (map
[uchar(match
[-1])] != '\0' && match
[-2] != '%')
232 && (match
[-2] == '\n' || match
[-2] == '%'))
236 if (map
[uchar(match
[-1])] != '\0'
237 && !(match
[-2] == '%'
238 && (match
[-3] == '\n'
239 || (match
[-3] == '%' && match
[-4] == '\n'))))
242 && (match
[-2] == '\n'
243 || (match
[-2] == '%' && match
[-3] == '\n')))
247 const char *p
= match
;
251 if (!had_percent
&& p
[1] == '%') {
252 if (p
[2] != '\0' && strchr(ignore_fields
, p
[2]) != 0) {
253 *cont
= skip_field(bufend
, match
+ matchlen
);
265 for (const char *q
= p
- 1; *q
== ' ' || *q
== '\t'; q
--)
279 file_buffer::file_buffer()
280 : buffer(0), bufend(0)
284 file_buffer::~file_buffer()
289 const char *file_buffer::get_start() const
291 return buffer
? buffer
+ 4 : 0;
294 const char *file_buffer::get_end() const
299 int file_buffer::load(int fd
, const char *filename
)
302 if (fstat(fd
, &sb
) < 0)
303 error("can't fstat `%1': %2", filename
, strerror(errno
));
304 else if ((sb
.st_mode
& S_IFMT
) != S_IFREG
)
305 error("`%1' is not a regular file", filename
);
307 // We need one character extra at the beginning for an additional newline
308 // used as a sentinel. We get 4 instead so that the read buffer will be
309 // word-aligned. This seems to make the read slightly faster. We also
310 // need one character at the end also for an addional newline used as a
312 int size
= int(sb
.st_size
);
313 buffer
= new char[size
+ 4 + 1];
314 int nread
= read(fd
, buffer
+ 4, size
);
316 error("error reading `%1': %2", filename
, strerror(errno
));
317 else if (nread
!= size
)
318 error("size of `%1' decreased", filename
);
321 nread
= read(fd
, &c
, 1);
323 error("size of `%1' increased", filename
);
324 else if (memchr(buffer
+ 4, '\0', size
< 1024 ? size
: 1024) != 0)
325 error("database `%1' is a binary file", filename
);
329 bufend
= buffer
+ 4 + size
;
330 if (bufend
[-1] != '\n')
342 linear_searcher::linear_searcher(const char *query
, int query_len
,
343 const char *ign
, int trunc
)
344 : keys(0), nkeys(0), truncate_len(trunc
), ignore_fields(ign
)
346 const char *query_end
= query
+ query_len
;
349 for (p
= query
; p
< query_end
; p
++)
350 if (map
[uchar(*p
)] != '\0'
351 && (p
[1] == '\0' || map
[uchar(p
[1])] == '\0'))
355 keys
= new bmpattern
*[nk
];
358 while (p
< query_end
&& map
[uchar(*p
)] == '\0')
362 const char *start
= p
;
363 while (p
< query_end
&& map
[uchar(*p
)] != '\0')
365 keys
[nkeys
++] = new bmpattern(start
, p
- start
);
374 linear_searcher::~linear_searcher()
376 for (int i
= 0; i
< nkeys
; i
++)
381 int linear_searcher::search(const char *buffer
, const char *bufend
,
382 const char **startp
, int *lengthp
) const
384 assert(bufend
- buffer
> 0);
385 assert(buffer
[-1] == '\n');
386 assert(bufend
[-1] == '\n');
390 const char *refstart
;
391 const char *found
= search_and_check(keys
[0], buffer
, bufend
, &refstart
);
394 const char *refend
= find_end(bufend
, found
+ keys
[0]->length());
395 for (int i
= 1; i
< nkeys
; i
++)
396 if (!search_and_check(keys
[i
], refstart
, refend
))
400 *lengthp
= refend
- refstart
;
408 class linear_search_item
: public search_item
{
411 linear_search_item(const char *filename
, int fid
);
412 ~linear_search_item();
414 search_item_iterator
*make_search_item_iterator(const char *);
415 friend class linear_search_item_iterator
;
418 class linear_search_item_iterator
: public search_item_iterator
{
419 linear_search_item
*lsi
;
422 linear_search_item_iterator(linear_search_item
*, const char *query
);
423 ~linear_search_item_iterator();
424 int next(const linear_searcher
&, const char **ptr
, int *lenp
,
428 search_item
*make_linear_search_item(int fd
, const char *filename
, int fid
)
430 linear_search_item
*item
= new linear_search_item(filename
, fid
);
431 if (!item
->load(fd
)) {
439 linear_search_item::linear_search_item(const char *filename
, int fid
)
440 : search_item(filename
, fid
)
444 linear_search_item::~linear_search_item()
448 int linear_search_item::load(int fd
)
450 return fbuf
.load(fd
, name
);
453 search_item_iterator
*linear_search_item::make_search_item_iterator(
456 return new linear_search_item_iterator(this, query
);
459 linear_search_item_iterator::linear_search_item_iterator(
460 linear_search_item
*p
, const char *)
465 linear_search_item_iterator::~linear_search_item_iterator()
469 int linear_search_item_iterator::next(const linear_searcher
&searcher
,
470 const char **startp
, int *lengthp
,
473 const char *bufstart
= lsi
->fbuf
.get_start();
474 const char *bufend
= lsi
->fbuf
.get_end();
475 const char *ptr
= bufstart
+ pos
;
476 if (ptr
< bufend
&& searcher
.search(ptr
, bufend
, startp
, lengthp
)) {
477 pos
= *startp
+ *lengthp
- bufstart
;
479 *ridp
= reference_id(lsi
->filename_id
, *startp
- bufstart
);