2 * Copyright (c) 1992, 1993
3 * The Regents of the University of California. All rights reserved.
5 * This code is derived from software contributed to Berkeley by
6 * Christos Zoulas of Cornell University.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * @(#)tokenizer.c 8.1 (Berkeley) 6/4/93
33 * $NetBSD: tokenizer.c,v 1.14 2003/12/05 13:37:48 lukem Exp $
34 * $DragonFly: src/lib/libedit/tokenizer.c,v 1.5 2005/11/13 11:58:30 corecode Exp $
40 * tokenize.c: Bourne shell like tokenizer
47 Q_none
, Q_single
, Q_double
, Q_one
, Q_doubleone
58 #define tok_strdup(a) strdup(a)
59 #define tok_malloc(a) malloc(a)
60 #define tok_free(a) free(a)
61 #define tok_realloc(a, b) realloc(a, b)
65 char *ifs
; /* In field separator */
66 int argc
, amax
; /* Current and maximum number of args */
67 char **argv
; /* Argument list */
68 char *wptr
, *wmax
; /* Space and limit on the word buffer */
69 char *wstart
; /* Beginning of next word */
70 char *wspace
; /* Space of word buffer */
71 quote_t quote
; /* Quoting state */
72 int flags
; /* flags; */
76 private void tok_finish(Tokenizer
*);
80 * Finish a word in the tokenizer.
83 tok_finish(Tokenizer
*tok
)
87 if ((tok
->flags
& TOK_KEEP
) || tok
->wptr
!= tok
->wstart
) {
88 tok
->argv
[tok
->argc
++] = tok
->wstart
;
89 tok
->argv
[tok
->argc
] = NULL
;
90 tok
->wstart
= ++tok
->wptr
;
92 tok
->flags
&= ~TOK_KEEP
;
97 * Initialize the tokenizer
100 tok_init(const char *ifs
)
102 Tokenizer
*tok
= (Tokenizer
*) tok_malloc(sizeof(Tokenizer
));
106 tok
->ifs
= tok_strdup(ifs
? ifs
: IFS
);
107 if (tok
->ifs
== NULL
) {
108 tok_free((ptr_t
)tok
);
113 tok
->argv
= (char **) tok_malloc(sizeof(char *) * tok
->amax
);
114 if (tok
->argv
== NULL
) {
115 tok_free((ptr_t
)tok
->ifs
);
116 tok_free((ptr_t
)tok
);
120 tok
->wspace
= (char *) tok_malloc(WINCR
);
121 if (tok
->wspace
== NULL
) {
122 tok_free((ptr_t
)tok
->argv
);
123 tok_free((ptr_t
)tok
->ifs
);
124 tok_free((ptr_t
)tok
);
127 tok
->wmax
= tok
->wspace
+ WINCR
;
128 tok
->wstart
= tok
->wspace
;
129 tok
->wptr
= tok
->wspace
;
138 * Reset the tokenizer
141 tok_reset(Tokenizer
*tok
)
145 tok
->wstart
= tok
->wspace
;
146 tok
->wptr
= tok
->wspace
;
156 tok_end(Tokenizer
*tok
)
159 tok_free((ptr_t
) tok
->ifs
);
160 tok_free((ptr_t
) tok
->wspace
);
161 tok_free((ptr_t
) tok
->argv
);
162 tok_free((ptr_t
) tok
);
168 * Bourne shell (sh(1)) like tokenizing
170 * tok current tokenizer state (setup with tok_init())
175 * 2 Unmatched double quote
176 * 1 Unmatched single quote
178 * Modifies (if return value is 0):
179 * argc number of arguments
180 * argv argument array
181 * cursorc if !NULL, argv element containing cursor
182 * cursorv if !NULL, offset in argv[cursorc] of cursor
185 tok_line(Tokenizer
*tok
, const LineInfo
*line
,
186 int *argc
, const char ***argv
, int *cursorc
, int *cursoro
)
193 for (ptr
= line
->buffer
; ;ptr
++) {
194 if (ptr
>= line
->lastchar
)
196 if (ptr
== line
->cursor
) {
198 co
= tok
->wptr
- tok
->wstart
;
202 tok
->flags
|= TOK_KEEP
;
203 tok
->flags
&= ~TOK_EAT
;
204 switch (tok
->quote
) {
206 tok
->quote
= Q_single
; /* Enter single quote
210 case Q_single
: /* Exit single quote mode */
214 case Q_one
: /* Quote this ' */
219 case Q_double
: /* Stay in double quote mode */
223 case Q_doubleone
: /* Quote this ' */
224 tok
->quote
= Q_double
;
234 tok
->flags
&= ~TOK_EAT
;
235 tok
->flags
|= TOK_KEEP
;
236 switch (tok
->quote
) {
237 case Q_none
: /* Enter double quote mode */
238 tok
->quote
= Q_double
;
241 case Q_double
: /* Exit double quote mode */
245 case Q_one
: /* Quote this " */
250 case Q_single
: /* Stay in single quote mode */
254 case Q_doubleone
: /* Quote this " */
255 tok
->quote
= Q_double
;
265 tok
->flags
|= TOK_KEEP
;
266 tok
->flags
&= ~TOK_EAT
;
267 switch (tok
->quote
) {
268 case Q_none
: /* Quote next character */
272 case Q_double
: /* Quote next character */
273 tok
->quote
= Q_doubleone
;
276 case Q_one
: /* Quote this, restore state */
281 case Q_single
: /* Stay in single quote mode */
285 case Q_doubleone
: /* Quote this \ */
286 tok
->quote
= Q_double
;
296 tok
->flags
&= ~TOK_EAT
;
297 switch (tok
->quote
) {
303 *tok
->wptr
++ = *ptr
; /* Add the return */
306 case Q_doubleone
: /* Back to double, eat the '\n' */
307 tok
->flags
|= TOK_EAT
;
308 tok
->quote
= Q_double
;
311 case Q_one
: /* No quote, more eat the '\n' */
312 tok
->flags
|= TOK_EAT
;
322 switch (tok
->quote
) {
324 /* Finish word and return */
325 if (tok
->flags
& TOK_EAT
) {
326 tok
->flags
&= ~TOK_EAT
;
338 tok
->quote
= Q_double
;
353 tok
->flags
&= ~TOK_EAT
;
354 switch (tok
->quote
) {
356 if (strchr(tok
->ifs
, *ptr
) != NULL
)
370 tok
->quote
= Q_double
;
386 if (tok
->wptr
>= tok
->wmax
- 4) {
387 size_t size
= tok
->wmax
- tok
->wspace
+ WINCR
;
388 char *s
= (char *) tok_realloc(tok
->wspace
, size
);
392 if (s
!= tok
->wspace
) {
394 for (i
= 0; i
< tok
->argc
; i
++) {
396 (tok
->argv
[i
] - tok
->wspace
) + s
;
398 tok
->wptr
= (tok
->wptr
- tok
->wspace
) + s
;
399 tok
->wstart
= (tok
->wstart
- tok
->wspace
) + s
;
402 tok
->wmax
= s
+ size
;
404 if (tok
->argc
>= tok
->amax
- 4) {
407 p
= (char **) tok_realloc(tok
->argv
,
408 tok
->amax
* sizeof(char *));
415 if (cc
== -1 && co
== -1) {
417 co
= tok
->wptr
- tok
->wstart
;
424 *argv
= (const char **)tok
->argv
;
430 * Simpler version of tok_line, taking a NUL terminated line
431 * and splitting into words, ignoring cursor state.
434 tok_str(Tokenizer
*tok
, const char *line
, int *argc
, const char ***argv
)
438 memset(&li
, 0, sizeof(li
));
440 li
.cursor
= li
.lastchar
= strchr(line
, '\0');
441 return (tok_line(tok
, &li
, argc
, argv
, NULL
, NULL
));