2 /* Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc.
3 Written by James Clark (jjc@jclark.com)
5 This file is part of groff.
7 groff is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 groff is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License along
18 with groff; see the file COPYING. If not, write to the Free Software
19 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
43 int load(int fd
, const char *filename
);
44 const char *get_start() const;
45 const char *get_end() const;
48 typedef unsigned char uchar
;
50 static uchar map
[256];
51 static uchar inv_map
[256][3];
57 static map_init the_map_init
;
62 for (i
= 0; i
< 256; i
++)
63 map
[i
] = csalnum(i
) ? cmlower(i
) : '\0';
64 for (i
= 0; i
< 256; i
++) {
67 inv_map
[i
][1] = cmupper(i
);
70 else if (csdigit(i
)) {
85 bmpattern(const char *pattern
, int pattern_length
);
87 const char *search(const char *p
, const char *end
) const;
91 bmpattern::bmpattern(const char *pattern
, int pattern_length
)
96 for (i
= 0; i
< len
; i
++)
97 pat
[i
] = map
[uchar(pattern
[i
])];
98 for (i
= 0; i
< 256; i
++)
100 for (i
= 0; i
< len
; i
++)
101 for (const unsigned char *inv
= inv_map
[uchar(pat
[i
])]; *inv
; inv
++)
102 delta
[*inv
] = len
- i
- 1;
105 const char *bmpattern::search(const char *buf
, const char *end
) const
107 int buflen
= end
- buf
;
112 strend
= end
- len
*4;
115 const char *k
= buf
+ len
- 1;
116 const int *del
= delta
;
117 const char *pattern
= pat
;
120 int t
= del
[uchar(*k
)];
127 while (k
< end
&& del
[uchar(*k
)] != 0)
136 if (map
[uchar(*--s
)] != uchar(pattern
[--j
]))
144 bmpattern::~bmpattern()
149 inline int bmpattern::length() const
155 static const char *find_end(const char *bufend
, const char *p
);
157 const char *linear_searcher::search_and_check(const bmpattern
*key
,
158 const char *buf
, const char *bufend
, const char **start
) const
160 assert(buf
[-1] == '\n');
161 assert(bufend
[-1] == '\n');
162 const char *ptr
= buf
;
164 const char *found
= key
->search(ptr
, bufend
);
167 if (check_match(buf
, bufend
, found
, key
->length(), &ptr
, start
))
173 static const char *skip_field(const char *end
, const char *p
)
177 if (p
== end
|| *p
== '%')
180 for (q
= p
; *q
== ' ' || *q
== '\t'; q
++)
189 static const char *find_end(const char *bufend
, const char *p
)
196 for (q
= p
; *q
== ' ' || *q
== '\t'; q
++)
206 int linear_searcher::check_match(const char *buf
, const char *bufend
,
207 const char *match
, int matchlen
,
208 const char **cont
, const char **start
) const
211 // The user is required to supply only the first truncate_len characters
212 // of the key. If truncate_len <= 0, he must supply all the key.
213 if ((truncate_len
<= 0 || matchlen
< truncate_len
)
214 && map
[uchar(match
[matchlen
])] != '\0')
217 // The character before the match must not be an alphanumeric
218 // character (unless the alphanumeric character follows one or two
219 // percent characters at the beginning of the line), nor must it be
220 // a percent character at the beginning of a line, nor a percent
221 // character following a percent character at the beginning of a
224 switch (match
- buf
) {
228 if (match
[-1] == '%' || map
[uchar(match
[-1])] != '\0')
232 if (map
[uchar(match
[-1])] != '\0' && match
[-2] != '%')
235 && (match
[-2] == '\n' || match
[-2] == '%'))
239 if (map
[uchar(match
[-1])] != '\0'
240 && !(match
[-2] == '%'
241 && (match
[-3] == '\n'
242 || (match
[-3] == '%' && match
[-4] == '\n'))))
245 && (match
[-2] == '\n'
246 || (match
[-2] == '%' && match
[-3] == '\n')))
250 const char *p
= match
;
254 if (!had_percent
&& p
[1] == '%') {
255 if (p
[2] != '\0' && strchr(ignore_fields
, p
[2]) != 0) {
256 *cont
= skip_field(bufend
, match
+ matchlen
);
269 for (q
= p
- 1; *q
== ' ' || *q
== '\t'; q
--)
283 file_buffer::file_buffer()
284 : buffer(0), bufend(0)
288 file_buffer::~file_buffer()
293 const char *file_buffer::get_start() const
295 return buffer
? buffer
+ 4 : 0;
298 const char *file_buffer::get_end() const
303 int file_buffer::load(int fd
, const char *filename
)
306 if (fstat(fd
, &sb
) < 0)
307 error("can't fstat `%1': %2", filename
, strerror(errno
));
308 else if (!S_ISREG(sb
.st_mode
))
309 error("`%1' is not a regular file", filename
);
311 // We need one character extra at the beginning for an additional newline
312 // used as a sentinel. We get 4 instead so that the read buffer will be
313 // word-aligned. This seems to make the read slightly faster. We also
314 // need one character at the end also for an additional newline used as a
316 int size
= int(sb
.st_size
);
317 buffer
= new char[size
+ 4 + 1];
318 int nread
= read(fd
, buffer
+ 4, size
);
320 error("error reading `%1': %2", filename
, strerror(errno
));
321 else if (nread
!= size
)
322 error("size of `%1' decreased", filename
);
325 nread
= read(fd
, &c
, 1);
327 error("size of `%1' increased", filename
);
328 else if (memchr(buffer
+ 4, '\0', size
< 1024 ? size
: 1024) != 0)
329 error("database `%1' is a binary file", filename
);
333 bufend
= buffer
+ 4 + size
;
334 if (bufend
[-1] != '\n')
346 linear_searcher::linear_searcher(const char *query
, int query_len
,
347 const char *ign
, int trunc
)
348 : keys(0), nkeys(0), truncate_len(trunc
), ignore_fields(ign
)
350 const char *query_end
= query
+ query_len
;
353 for (p
= query
; p
< query_end
; p
++)
354 if (map
[uchar(*p
)] != '\0'
355 && (p
[1] == '\0' || map
[uchar(p
[1])] == '\0'))
359 keys
= new bmpattern
*[nk
];
362 while (p
< query_end
&& map
[uchar(*p
)] == '\0')
366 const char *start
= p
;
367 while (p
< query_end
&& map
[uchar(*p
)] != '\0')
369 keys
[nkeys
++] = new bmpattern(start
, p
- start
);
378 linear_searcher::~linear_searcher()
380 for (int i
= 0; i
< nkeys
; i
++)
385 int linear_searcher::search(const char *buffer
, const char *bufend
,
386 const char **startp
, int *lengthp
) const
388 assert(bufend
- buffer
> 0);
389 assert(buffer
[-1] == '\n');
390 assert(bufend
[-1] == '\n');
394 const char *refstart
;
395 const char *found
= search_and_check(keys
[0], buffer
, bufend
, &refstart
);
398 const char *refend
= find_end(bufend
, found
+ keys
[0]->length());
400 for (i
= 1; i
< nkeys
; i
++)
401 if (!search_and_check(keys
[i
], refstart
, refend
))
405 *lengthp
= refend
- refstart
;
413 class linear_search_item
: public search_item
{
416 linear_search_item(const char *filename
, int fid
);
417 ~linear_search_item();
419 search_item_iterator
*make_search_item_iterator(const char *);
420 friend class linear_search_item_iterator
;
423 class linear_search_item_iterator
: public search_item_iterator
{
424 linear_search_item
*lsi
;
427 linear_search_item_iterator(linear_search_item
*, const char *query
);
428 ~linear_search_item_iterator();
429 int next(const linear_searcher
&, const char **ptr
, int *lenp
,
433 search_item
*make_linear_search_item(int fd
, const char *filename
, int fid
)
435 linear_search_item
*item
= new linear_search_item(filename
, fid
);
436 if (!item
->load(fd
)) {
444 linear_search_item::linear_search_item(const char *filename
, int fid
)
445 : search_item(filename
, fid
)
449 linear_search_item::~linear_search_item()
453 int linear_search_item::load(int fd
)
455 return fbuf
.load(fd
, name
);
458 search_item_iterator
*linear_search_item::make_search_item_iterator(
461 return new linear_search_item_iterator(this, query
);
464 linear_search_item_iterator::linear_search_item_iterator(
465 linear_search_item
*p
, const char *)
470 linear_search_item_iterator::~linear_search_item_iterator()
474 int linear_search_item_iterator::next(const linear_searcher
&searcher
,
475 const char **startp
, int *lengthp
,
478 const char *bufstart
= lsi
->fbuf
.get_start();
479 const char *bufend
= lsi
->fbuf
.get_end();
480 const char *ptr
= bufstart
+ pos
;
481 if (ptr
< bufend
&& searcher
.search(ptr
, bufend
, startp
, lengthp
)) {
482 pos
= *startp
+ *lengthp
- bufstart
;
484 *ridp
= reference_id(lsi
->filename_id
, *startp
- bufstart
);