2 /* Copyright (C) 1989, 1990, 1991, 1992, 2000, 2001
3 Free Software Foundation, Inc.
4 Written by James Clark (jjc@jclark.com)
6 This file is part of groff.
8 groff is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
13 groff is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License along
19 with groff; see the file COPYING. If not, write to the Free Software
20 Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */
44 int load(int fd
, const char *filename
);
45 const char *get_start() const;
46 const char *get_end() const;
49 typedef unsigned char uchar
;
51 static uchar map
[256];
52 static uchar inv_map
[256][3];
58 static map_init the_map_init
;
63 for (i
= 0; i
< 256; i
++)
64 map
[i
] = csalnum(i
) ? cmlower(i
) : '\0';
65 for (i
= 0; i
< 256; i
++) {
68 inv_map
[i
][1] = cmupper(i
);
71 else if (csdigit(i
)) {
86 bmpattern(const char *pattern
, int pattern_length
);
88 const char *search(const char *p
, const char *end
) const;
92 bmpattern::bmpattern(const char *pattern
, int pattern_length
)
97 for (i
= 0; i
< len
; i
++)
98 pat
[i
] = map
[uchar(pattern
[i
])];
99 for (i
= 0; i
< 256; i
++)
101 for (i
= 0; i
< len
; i
++)
102 for (const unsigned char *inv
= inv_map
[uchar(pat
[i
])]; *inv
; inv
++)
103 delta
[*inv
] = len
- i
- 1;
106 const char *bmpattern::search(const char *buf
, const char *end
) const
108 int buflen
= end
- buf
;
113 strend
= end
- len
*4;
116 const char *k
= buf
+ len
- 1;
117 const int *del
= delta
;
118 const char *pattern
= pat
;
121 int t
= del
[uchar(*k
)];
128 while (k
< end
&& del
[uchar(*k
)] != 0)
137 if (map
[uchar(*--s
)] != uchar(pattern
[--j
]))
145 bmpattern::~bmpattern()
150 inline int bmpattern::length() const
156 static const char *find_end(const char *bufend
, const char *p
);
158 const char *linear_searcher::search_and_check(const bmpattern
*key
,
159 const char *buf
, const char *bufend
, const char **start
) const
161 assert(buf
[-1] == '\n');
162 assert(bufend
[-1] == '\n');
163 const char *ptr
= buf
;
165 const char *found
= key
->search(ptr
, bufend
);
168 if (check_match(buf
, bufend
, found
, key
->length(), &ptr
, start
))
174 static const char *skip_field(const char *end
, const char *p
)
178 if (p
== end
|| *p
== '%')
181 for (q
= p
; *q
== ' ' || *q
== '\t'; q
++)
190 static const char *find_end(const char *bufend
, const char *p
)
197 for (q
= p
; *q
== ' ' || *q
== '\t'; q
++)
207 int linear_searcher::check_match(const char *buf
, const char *bufend
,
208 const char *match
, int matchlen
,
209 const char **cont
, const char **start
) const
212 // The user is required to supply only the first truncate_len characters
213 // of the key. If truncate_len <= 0, he must supply all the key.
214 if ((truncate_len
<= 0 || matchlen
< truncate_len
)
215 && map
[uchar(match
[matchlen
])] != '\0')
218 // The character before the match must not be an alphanumeric
219 // character (unless the alphanumeric character follows one or two
220 // percent characters at the beginning of the line), nor must it be
221 // a percent character at the beginning of a line, nor a percent
222 // character following a percent character at the beginning of a
225 switch (match
- buf
) {
229 if (match
[-1] == '%' || map
[uchar(match
[-1])] != '\0')
233 if (map
[uchar(match
[-1])] != '\0' && match
[-2] != '%')
236 && (match
[-2] == '\n' || match
[-2] == '%'))
240 if (map
[uchar(match
[-1])] != '\0'
241 && !(match
[-2] == '%'
242 && (match
[-3] == '\n'
243 || (match
[-3] == '%' && match
[-4] == '\n'))))
246 && (match
[-2] == '\n'
247 || (match
[-2] == '%' && match
[-3] == '\n')))
251 const char *p
= match
;
255 if (!had_percent
&& p
[1] == '%') {
256 if (p
[2] != '\0' && strchr(ignore_fields
, p
[2]) != 0) {
257 *cont
= skip_field(bufend
, match
+ matchlen
);
270 for (q
= p
- 1; *q
== ' ' || *q
== '\t'; q
--)
284 file_buffer::file_buffer()
285 : buffer(0), bufend(0)
289 file_buffer::~file_buffer()
294 const char *file_buffer::get_start() const
296 return buffer
? buffer
+ 4 : 0;
299 const char *file_buffer::get_end() const
304 int file_buffer::load(int fd
, const char *filename
)
307 if (fstat(fd
, &sb
) < 0)
308 error("can't fstat `%1': %2", filename
, strerror(errno
));
309 else if (!S_ISREG(sb
.st_mode
))
310 error("`%1' is not a regular file", filename
);
312 // We need one character extra at the beginning for an additional newline
313 // used as a sentinel. We get 4 instead so that the read buffer will be
314 // word-aligned. This seems to make the read slightly faster. We also
315 // need one character at the end also for an additional newline used as a
317 int size
= int(sb
.st_size
);
318 buffer
= new char[size
+ 4 + 1];
319 int nread
= read(fd
, buffer
+ 4, size
);
321 error("error reading `%1': %2", filename
, strerror(errno
));
322 else if (nread
!= size
)
323 error("size of `%1' decreased", filename
);
326 nread
= read(fd
, &c
, 1);
328 error("size of `%1' increased", filename
);
329 else if (memchr(buffer
+ 4, '\0', size
< 1024 ? size
: 1024) != 0)
330 error("database `%1' is a binary file", filename
);
334 int sidx
= 4, didx
= 4;
335 for ( ; sidx
< size
+ 4; sidx
++, didx
++)
337 if (buffer
[sidx
] == '\r')
339 if (buffer
[++sidx
] != '\n')
340 buffer
[didx
++] = '\r';
345 buffer
[didx
] = buffer
[sidx
];
347 bufend
= buffer
+ 4 + size
;
348 if (bufend
[-1] != '\n')
360 linear_searcher::linear_searcher(const char *query
, int query_len
,
361 const char *ign
, int trunc
)
362 : ignore_fields(ign
), truncate_len(trunc
), keys(0), nkeys(0)
364 const char *query_end
= query
+ query_len
;
367 for (p
= query
; p
< query_end
; p
++)
368 if (map
[uchar(*p
)] != '\0'
369 && (p
[1] == '\0' || map
[uchar(p
[1])] == '\0'))
373 keys
= new bmpattern
*[nk
];
376 while (p
< query_end
&& map
[uchar(*p
)] == '\0')
380 const char *start
= p
;
381 while (p
< query_end
&& map
[uchar(*p
)] != '\0')
383 keys
[nkeys
++] = new bmpattern(start
, p
- start
);
392 linear_searcher::~linear_searcher()
394 for (int i
= 0; i
< nkeys
; i
++)
399 int linear_searcher::search(const char *buffer
, const char *bufend
,
400 const char **startp
, int *lengthp
) const
402 assert(bufend
- buffer
> 0);
403 assert(buffer
[-1] == '\n');
404 assert(bufend
[-1] == '\n');
408 const char *refstart
;
409 const char *found
= search_and_check(keys
[0], buffer
, bufend
, &refstart
);
412 const char *refend
= find_end(bufend
, found
+ keys
[0]->length());
414 for (i
= 1; i
< nkeys
; i
++)
415 if (!search_and_check(keys
[i
], refstart
, refend
))
419 *lengthp
= refend
- refstart
;
427 class linear_search_item
: public search_item
{
430 linear_search_item(const char *filename
, int fid
);
431 ~linear_search_item();
433 search_item_iterator
*make_search_item_iterator(const char *);
434 friend class linear_search_item_iterator
;
437 class linear_search_item_iterator
: public search_item_iterator
{
438 linear_search_item
*lsi
;
441 linear_search_item_iterator(linear_search_item
*, const char *query
);
442 ~linear_search_item_iterator();
443 int next(const linear_searcher
&, const char **ptr
, int *lenp
,
447 search_item
*make_linear_search_item(int fd
, const char *filename
, int fid
)
449 linear_search_item
*item
= new linear_search_item(filename
, fid
);
450 if (!item
->load(fd
)) {
458 linear_search_item::linear_search_item(const char *filename
, int fid
)
459 : search_item(filename
, fid
)
463 linear_search_item::~linear_search_item()
467 int linear_search_item::load(int fd
)
469 return fbuf
.load(fd
, name
);
472 search_item_iterator
*linear_search_item::make_search_item_iterator(
475 return new linear_search_item_iterator(this, query
);
478 linear_search_item_iterator::linear_search_item_iterator(
479 linear_search_item
*p
, const char *)
484 linear_search_item_iterator::~linear_search_item_iterator()
488 int linear_search_item_iterator::next(const linear_searcher
&searcher
,
489 const char **startp
, int *lengthp
,
492 const char *bufstart
= lsi
->fbuf
.get_start();
493 const char *bufend
= lsi
->fbuf
.get_end();
494 const char *ptr
= bufstart
+ pos
;
495 if (ptr
< bufend
&& searcher
.search(ptr
, bufend
, startp
, lengthp
)) {
496 pos
= *startp
+ *lengthp
- bufstart
;
498 *ridp
= reference_id(lsi
->filename_id
, *startp
- bufstart
);