1 /* search.c -- How to search large bodies of text. */
3 /* This file is part of GNU Info, a program for reading online documentation
6 Copyright (C) 1993, 97 Free Software Foundation, Inc.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22 Written by Brian Fox (bfox@ai.mit.edu). */
29 /* The search functions take two arguments:
31 1) a string to search for, and
33 2) a pointer to a SEARCH_BINDING which contains the buffer, start,
34 and end of the search.
36 They return a long, which is the offset from the start of the buffer
37 at which the match was found. An offset of -1 indicates failure. */
39 /* A function which makes a binding with buffer and bounds. */
41 make_binding (buffer
, start
, end
)
45 SEARCH_BINDING
*binding
;
47 binding
= (SEARCH_BINDING
*)xmalloc (sizeof (SEARCH_BINDING
));
48 binding
->buffer
= buffer
;
49 binding
->start
= start
;
56 /* Make a copy of BINDING without duplicating the data. */
58 copy_binding (binding
)
59 SEARCH_BINDING
*binding
;
63 copy
= make_binding (binding
->buffer
, binding
->start
, binding
->end
);
64 copy
->flags
= binding
->flags
;
69 /* **************************************************************** */
71 /* The Actual Searching Functions */
73 /* **************************************************************** */
75 /* Search forwards or backwards for the text delimited by BINDING.
76 The search is forwards if BINDING->start is greater than BINDING->end. */
78 search (string
, binding
)
80 SEARCH_BINDING
*binding
;
84 /* If the search is backwards, then search backwards, otherwise forwards. */
85 if (binding
->start
> binding
->end
)
86 result
= search_backward (string
, binding
);
88 result
= search_forward (string
, binding
);
93 /* Search forwards for STRING through the text delimited in BINDING. */
95 search_forward (string
, binding
)
97 SEARCH_BINDING
*binding
;
99 register int c
, i
, len
;
100 register char *buff
, *end
;
101 char *alternate
= (char *)NULL
;
103 len
= strlen (string
);
105 /* We match characters in the search buffer against STRING and ALTERNATE.
106 ALTERNATE is a case reversed version of STRING; this is cheaper than
107 case folding each character before comparison. Alternate is only
108 used if the case folding bit is turned on in the passed BINDING. */
110 if (binding
->flags
& S_FoldCase
)
112 alternate
= xstrdup (string
);
114 for (i
= 0; i
< len
; i
++)
116 if (islower (alternate
[i
]))
117 alternate
[i
] = toupper (alternate
[i
]);
118 else if (isupper (alternate
[i
]))
119 alternate
[i
] = tolower (alternate
[i
]);
123 buff
= binding
->buffer
+ binding
->start
;
124 end
= binding
->buffer
+ binding
->end
+ 1;
126 while (buff
< (end
- len
))
128 for (i
= 0; i
< len
; i
++)
132 if ((c
!= string
[i
]) && (!alternate
|| c
!= alternate
[i
]))
140 if (binding
->flags
& S_SkipDest
)
142 return ((long) (buff
- binding
->buffer
));
154 /* Search for STRING backwards through the text delimited in BINDING. */
156 search_backward (input_string
, binding
)
158 SEARCH_BINDING
*binding
;
160 register int c
, i
, len
;
161 register char *buff
, *end
;
163 char *alternate
= (char *)NULL
;
165 len
= strlen (input_string
);
167 /* Reverse the characters in the search string. */
168 string
= (char *)xmalloc (1 + len
);
169 for (c
= 0, i
= len
- 1; input_string
[c
]; c
++, i
--)
170 string
[i
] = input_string
[c
];
174 /* We match characters in the search buffer against STRING and ALTERNATE.
175 ALTERNATE is a case reversed version of STRING; this is cheaper than
176 case folding each character before comparison. ALTERNATE is only
177 used if the case folding bit is turned on in the passed BINDING. */
179 if (binding
->flags
& S_FoldCase
)
181 alternate
= xstrdup (string
);
183 for (i
= 0; i
< len
; i
++)
185 if (islower (alternate
[i
]))
186 alternate
[i
] = toupper (alternate
[i
]);
187 else if (isupper (alternate
[i
]))
188 alternate
[i
] = tolower (alternate
[i
]);
192 buff
= binding
->buffer
+ binding
->start
- 1;
193 end
= binding
->buffer
+ binding
->end
;
195 while (buff
> (end
+ len
))
197 for (i
= 0; i
< len
; i
++)
201 if (c
!= string
[i
] && (alternate
&& c
!= alternate
[i
]))
211 if (binding
->flags
& S_SkipDest
)
213 return ((long) (1 + (buff
- binding
->buffer
)));
226 /* Find STRING in LINE, returning the offset of the end of the string.
227 Return an offset of -1 if STRING does not appear in LINE. The search
228 is bound by the end of the line (i.e., either NEWLINE or 0). */
230 string_in_line (string
, line
)
234 SEARCH_BINDING binding
;
236 /* Find the end of the line. */
237 for (end
= 0; line
[end
] && line
[end
] != '\n'; end
++);
239 /* Search for STRING within these confines. */
240 binding
.buffer
= line
;
243 binding
.flags
= S_FoldCase
| S_SkipDest
;
245 return (search_forward (string
, &binding
));
248 /* Return non-zero if STRING is the first text to appear at BINDING. */
250 looking_at (string
, binding
)
252 SEARCH_BINDING
*binding
;
256 search_end
= search (string
, binding
);
258 /* If the string was not found, SEARCH_END is -1. If the string was found,
259 but not right away, SEARCH_END is != binding->start. Otherwise, the
260 string was found at binding->start. */
261 return (search_end
== binding
->start
);
264 /* **************************************************************** */
266 /* Small String Searches */
268 /* **************************************************************** */
270 /* Function names that start with "skip" are passed a string, and return
271 an offset from the start of that string. Function names that start
272 with "find" are passed a SEARCH_BINDING, and return an absolute position
273 marker of the item being searched for. "Find" functions return a value
274 of -1 if the item being looked for couldn't be found. */
276 /* Return the index of the first non-whitespace character in STRING. */
278 skip_whitespace (string
)
283 for (i
= 0; string
&& whitespace (string
[i
]); i
++);
287 /* Return the index of the first non-whitespace or newline character in
290 skip_whitespace_and_newlines (string
)
295 for (i
= 0; string
&& (whitespace (string
[i
]) || string
[i
] == '\n'); i
++);
299 /* Return the index of the first whitespace character in STRING. */
301 skip_non_whitespace (string
)
306 for (i
= 0; string
&& !whitespace (string
[i
]); i
++);
310 /* Return the index of the first non-node character in STRING. Note that
311 this function contains quite a bit of hair to ignore periods in some
312 special cases. This is because we here at GNU ship some info files which
313 contain nodenames that contain periods. No such nodename can start with
314 a period, or continue with whitespace, newline, or ')' immediately following
315 the period. If second argument NEWLINES_OKAY is non-zero, newlines should
316 be skipped while parsing out the nodename specification. */
318 skip_node_characters (string
, newlines_okay
)
322 register int c
, i
= 0;
326 /* Handle special case. This is when another function has parsed out the
327 filename component of the node name, and we just want to parse out the
328 nodename proper. In that case, a period at the start of the nodename
329 indicates an empty nodename. */
330 if (string
&& *string
== '.')
333 if (string
&& *string
== '(')
340 for (; string
&& (c
= string
[i
]); i
++)
352 /* If the character following the close paren is a space or period,
353 then this node name has no more characters associated with it. */
357 ((!newlines_okay
) && (c
== '\n')) ||
358 ((paren_seen
&& string
[i
- 1] == ')') &&
359 (c
== ' ' || c
== '.')) ||
363 /* This test causes a node name ending in a period, like `This.', not to
364 be found. The trailing . is stripped. This occurs in the jargon
365 file (`I see no X here.' is a node name). */
368 (whitespace_or_newline (string
[i
+ 1])) ||
369 (string
[i
+ 1] == ')'))))
376 /* **************************************************************** */
378 /* Searching FILE_BUFFER's */
380 /* **************************************************************** */
382 /* Return the absolute position of the first occurence of a node separator in
383 BINDING-buffer. The search starts at BINDING->start. Return -1 if no node
384 separator was found. */
386 find_node_separator (binding
)
387 SEARCH_BINDING
*binding
;
392 body
= binding
->buffer
;
394 /* A node is started by [^L]^_[^L]\n. That is to say, the C-l's are
395 optional, but the DELETE and NEWLINE are not. This separator holds
396 true for all separated elements in an Info file, including the tags
397 table (if present) and the indirect tags table (if present). */
398 for (i
= binding
->start
; i
< binding
->end
- 1; i
++)
399 if (((body
[i
] == INFO_FF
&& body
[i
+ 1] == INFO_COOKIE
) &&
400 (body
[i
+ 2] == '\n' ||
401 (body
[i
+ 2] == INFO_FF
&& body
[i
+ 3] == '\n'))) ||
402 ((body
[i
] == INFO_COOKIE
) &&
403 (body
[i
+ 1] == '\n' ||
404 (body
[i
+ 1] == INFO_FF
&& body
[i
+ 2] == '\n'))))
409 /* Return the length of the node separator characters that BODY is
410 currently pointing at. */
412 skip_node_separator (body
)
419 if (body
[i
] == INFO_FF
)
422 if (body
[i
++] != INFO_COOKIE
)
425 if (body
[i
] == INFO_FF
)
428 if (body
[i
++] != '\n')
434 /* Return the number of characters from STRING to the start of
442 for (i
= 0; string
&& string
[i
] && string
[i
] != '\n'; i
++);
444 if (string
[i
] == '\n')
450 /* Return the absolute position of the beginning of a tags table in this
451 binding starting the search at binding->start. */
453 find_tags_table (binding
)
454 SEARCH_BINDING
*binding
;
456 SEARCH_BINDING search
;
459 search
.buffer
= binding
->buffer
;
460 search
.start
= binding
->start
;
461 search
.end
= binding
->end
;
462 search
.flags
= S_FoldCase
;
464 while ((position
= find_node_separator (&search
)) != -1 )
466 search
.start
= position
;
467 search
.start
+= skip_node_separator (search
.buffer
+ search
.start
);
469 if (looking_at (TAGS_TABLE_BEG_LABEL
, &search
))
475 /* Return the absolute position of the node named NODENAME in BINDING.
476 This is a brute force search, and we wish to avoid it when possible.
477 This function is called when a tag (indirect or otherwise) doesn't
478 really point to the right node. It returns the absolute position of
479 the separator preceding the node. */
481 find_node_in_binding (nodename
, binding
)
483 SEARCH_BINDING
*binding
;
487 SEARCH_BINDING search
;
489 namelen
= strlen (nodename
);
491 search
.buffer
= binding
->buffer
;
492 search
.start
= binding
->start
;
493 search
.end
= binding
->end
;
496 while ((position
= find_node_separator (&search
)) != -1)
498 search
.start
= position
;
499 search
.start
+= skip_node_separator (search
.buffer
+ search
.start
);
501 offset
= string_in_line (INFO_NODE_LABEL
, search
.buffer
+ search
.start
);
506 search
.start
+= offset
;
507 search
.start
+= skip_whitespace (search
.buffer
+ search
.start
);
508 offset
= skip_node_characters
509 (search
.buffer
+ search
.start
, DONT_SKIP_NEWLINES
);
511 /* Notice that this is an exact match. You cannot grovel through
512 the buffer with this function looking for random nodes. */
513 if ((offset
== namelen
) &&
514 (search
.buffer
[search
.start
] == nodename
[0]) &&
515 (strncmp (search
.buffer
+ search
.start
, nodename
, offset
) == 0))