2 * Copyright (c) 2014 Steffen (Daode) Nurpmeso <sdaoden@users.sf.net>.
4 * Copyright (C) 1989 - 1992, 2000, 2001
5 * Free Software Foundation, Inc.
6 * Written by James Clark (jjc@jclark.com)
8 * This is free software; you can redistribute it and/or modify it under
9 * the terms of the GNU General Public License as published by the Free
10 * Software Foundation; either version 2, or (at your option) any later
13 * This is distributed in the hope that it will be useful, but WITHOUT ANY
14 * WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 * You should have received a copy of the GNU General Public License along
19 * with groff; see the file COPYING. If not, write to the Free Software
20 * Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA.
48 int load(int fd
, const char *filename
);
49 const char *get_start() const;
50 const char *get_end() const;
53 typedef unsigned char uchar
;
55 static uchar map
[256];
56 static uchar inv_map
[256][3];
58 class map_init
// FIXME static initializer ctor
64 static map_init the_map_init
;
69 for (i
= 0; i
< 256; i
++)
70 map
[i
] = csalnum(i
) ? cmlower(i
) : '\0';
71 for (i
= 0; i
< 256; i
++) {
74 inv_map
[i
][1] = cmupper(i
);
77 else if (csdigit(i
)) {
93 bmpattern(const char *pattern
, int pattern_length
);
95 const char *search(const char *p
, const char *end
) const;
99 bmpattern::bmpattern(const char *pattern
, int pattern_length
)
100 : len(pattern_length
)
104 for (i
= 0; i
< len
; i
++)
105 pat
[i
] = map
[uchar(pattern
[i
])];
106 for (i
= 0; i
< 256; i
++)
108 for (i
= 0; i
< len
; i
++)
109 for (const unsigned char *inv
= inv_map
[uchar(pat
[i
])]; *inv
; inv
++)
110 delta
[*inv
] = len
- i
- 1;
113 const char *bmpattern::search(const char *buf
, const char *end
) const
115 int buflen
= end
- buf
;
120 strend
= end
- len
*4;
123 const char *k
= buf
+ len
- 1;
124 const int *del
= delta
;
125 const char *pattern
= pat
;
128 int t
= del
[uchar(*k
)];
135 while (k
< end
&& del
[uchar(*k
)] != 0)
144 if (map
[uchar(*--s
)] != uchar(pattern
[--j
]))
152 bmpattern::~bmpattern()
157 inline int bmpattern::length() const
162 static const char *find_end(const char *bufend
, const char *p
);
164 const char *linear_searcher::search_and_check(const bmpattern
*key
,
165 const char *buf
, const char *bufend
, const char **start
) const
167 assert(buf
[-1] == '\n');
168 assert(bufend
[-1] == '\n');
169 const char *ptr
= buf
;
171 const char *found
= key
->search(ptr
, bufend
);
174 if (check_match(buf
, bufend
, found
, key
->length(), &ptr
, start
))
180 static const char *skip_field(const char *end
, const char *p
)
184 if (p
== end
|| *p
== '%')
187 for (q
= p
; *q
== ' ' || *q
== '\t'; q
++)
196 static const char *find_end(const char *bufend
, const char *p
)
203 for (q
= p
; *q
== ' ' || *q
== '\t'; q
++)
212 int linear_searcher::check_match(const char *buf
, const char *bufend
,
213 const char *match
, int matchlen
,
214 const char **cont
, const char **start
) const
217 // The user is required to supply only the first truncate_len characters
218 // of the key. If truncate_len <= 0, he must supply all the key.
219 if ((truncate_len
<= 0 || matchlen
< truncate_len
)
220 && map
[uchar(match
[matchlen
])] != '\0')
223 // The character before the match must not be an alphanumeric
224 // character (unless the alphanumeric character follows one or two
225 // percent characters at the beginning of the line), nor must it be
226 // a percent character at the beginning of a line, nor a percent
227 // character following a percent character at the beginning of a
230 switch (match
- buf
) {
234 if (match
[-1] == '%' || map
[uchar(match
[-1])] != '\0')
238 if (map
[uchar(match
[-1])] != '\0' && match
[-2] != '%')
241 && (match
[-2] == '\n' || match
[-2] == '%'))
245 if (map
[uchar(match
[-1])] != '\0'
246 && !(match
[-2] == '%'
247 && (match
[-3] == '\n'
248 || (match
[-3] == '%' && match
[-4] == '\n'))))
251 && (match
[-2] == '\n'
252 || (match
[-2] == '%' && match
[-3] == '\n')))
256 const char *p
= match
;
260 if (!had_percent
&& p
[1] == '%') {
261 if (p
[2] != '\0' && strchr(ignore_fields
, p
[2]) != 0) {
262 *cont
= skip_field(bufend
, match
+ matchlen
);
275 for (q
= p
- 1; *q
== ' ' || *q
== '\t'; q
--)
289 file_buffer::file_buffer()
290 : buffer(0), bufend(0)
294 file_buffer::~file_buffer()
299 const char *file_buffer::get_start() const
301 return buffer
? buffer
+ 4 : 0;
304 const char *file_buffer::get_end() const
309 int file_buffer::load(int fd
, const char *filename
)
312 if (fstat(fd
, &sb
) < 0)
313 error("can't fstat `%1': %2", filename
, strerror(errno
));
314 else if (!S_ISREG(sb
.st_mode
))
315 error("`%1' is not a regular file", filename
);
317 // We need one character extra at the beginning for an additional newline
318 // used as a sentinel. We get 4 instead so that the read buffer will be
319 // word-aligned. This seems to make the read slightly faster. We also
320 // need one character at the end also for an additional newline used as a
322 int size
= int(sb
.st_size
);
323 buffer
= new char[size
+ 4 + 1];
324 int nread
= read(fd
, buffer
+ 4, size
);
326 error("error reading `%1': %2", filename
, strerror(errno
));
327 else if (nread
!= size
)
328 error("size of `%1' decreased", filename
);
331 nread
= read(fd
, &c
, 1);
333 error("size of `%1' increased", filename
);
334 else if (memchr(buffer
+ 4, '\0', size
< 1024 ? size
: 1024) != 0)
335 error("database `%1' is a binary file", filename
);
339 int sidx
= 4, didx
= 4;
340 for ( ; sidx
< size
+ 4; sidx
++, didx
++)
342 if (buffer
[sidx
] == '\r')
344 if (buffer
[++sidx
] != '\n')
345 buffer
[didx
++] = '\r';
350 buffer
[didx
] = buffer
[sidx
];
352 bufend
= buffer
+ 4 + size
;
353 if (bufend
[-1] != '\n')
365 linear_searcher::linear_searcher(const char *query
, int query_len
,
366 const char *ign
, int trunc
)
367 : ignore_fields(ign
), truncate_len(trunc
), keys(0), nkeys(0)
369 const char *query_end
= query
+ query_len
;
372 for (p
= query
; p
< query_end
; p
++)
373 if (map
[uchar(*p
)] != '\0'
374 && (p
[1] == '\0' || map
[uchar(p
[1])] == '\0'))
378 keys
= new bmpattern
*[nk
];
381 while (p
< query_end
&& map
[uchar(*p
)] == '\0')
385 const char *start
= p
;
386 while (p
< query_end
&& map
[uchar(*p
)] != '\0')
388 keys
[nkeys
++] = new bmpattern(start
, p
- start
);
397 linear_searcher::~linear_searcher()
399 for (int i
= 0; i
< nkeys
; i
++)
404 int linear_searcher::search(const char *buffer
, const char *bufend
,
405 const char **startp
, int *lengthp
) const
407 assert(bufend
- buffer
> 0);
408 assert(buffer
[-1] == '\n');
409 assert(bufend
[-1] == '\n');
413 const char *refstart
;
414 const char *found
= search_and_check(keys
[0], buffer
, bufend
, &refstart
);
417 const char *refend
= find_end(bufend
, found
+ keys
[0]->length());
419 for (i
= 1; i
< nkeys
; i
++)
420 if (!search_and_check(keys
[i
], refstart
, refend
))
424 *lengthp
= refend
- refstart
;
432 class linear_search_item
438 linear_search_item(const char *filename
, int fid
);
439 ~linear_search_item();
441 search_item_iterator
*make_search_item_iterator(const char *);
442 friend class linear_search_item_iterator
;
445 class linear_search_item_iterator
446 : public search_item_iterator
448 linear_search_item
*lsi
;
452 linear_search_item_iterator(linear_search_item
*, const char *query
);
453 ~linear_search_item_iterator();
454 int next(const linear_searcher
&, const char **ptr
, int *lenp
,
458 search_item
*make_linear_search_item(int fd
, const char *filename
, int fid
)
460 linear_search_item
*item
= new linear_search_item(filename
, fid
);
461 if (!item
->load(fd
)) {
469 linear_search_item::linear_search_item(const char *filename
, int fid
)
470 : search_item(filename
, fid
)
474 linear_search_item::~linear_search_item()
478 int linear_search_item::load(int fd
)
480 return fbuf
.load(fd
, name
);
483 search_item_iterator
*linear_search_item::make_search_item_iterator(
486 return new linear_search_item_iterator(this, query
);
489 linear_search_item_iterator::linear_search_item_iterator(
490 linear_search_item
*p
, const char *)
495 linear_search_item_iterator::~linear_search_item_iterator()
499 int linear_search_item_iterator::next(const linear_searcher
&searcher
,
500 const char **startp
, int *lengthp
,
503 const char *bufstart
= lsi
->fbuf
.get_start();
504 const char *bufend
= lsi
->fbuf
.get_end();
505 const char *ptr
= bufstart
+ pos
;
506 if (ptr
< bufend
&& searcher
.search(ptr
, bufend
, startp
, lengthp
)) {
507 pos
= *startp
+ *lengthp
- bufstart
;
509 *ridp
= reference_id(lsi
->filename_id
, *startp
- bufstart
);