3 * @author Dan Noland <nolandda@nolandda.org>
4 * @date Wed Mar 7 17:10:49 2018
6 * @brief Implementation of the tokenizing tools
15 #define __STDC_FORMAT_MACROS 1
19 // Predefined whitespece set
20 const char* ASCII_WHITESPACE_CHARS
= " \t\r\n\v\f";
22 // Forward decl of helper function
23 static void do_token_trim( tokset_t
* set
);
27 * Tokenizes a string based on separators and records the result in a
30 * @param set The output token set
31 * @param str The string to be tokenized
32 * @param sep An array of characters to tokenize on
33 * @param trim Boolean value determining if zero length tokens (i.e. empty string "") be removed
35 * @return The number of tokens created.
37 size_t create_tokens( tokset_t
* set
, const char* str
,
38 const char* sep
, uint8_t trim
) {
49 // Make a first pass marking seperators with '\0' and counting tokens.
53 if(tstr
[i
] == sep
[j
]) {
62 // we accidentially compute strlen along the way. Minor
63 // optimization here to avoid a call.
66 // Allocate the tokens
67 tokens
= calloc(tokcount
, sizeof(char*));
69 // Second pass to assign token pointers
72 for(i
=0; i
<len
; i
+=1) {
73 if( tstr
[i
] == '\0' ) {
79 // one more left over after the loop (we never see the final '\0')
82 set
->victimstr
= tstr
;
83 set
->numtok
= tokcount
;
86 // Trim out null tokens if the user requested
95 * Helper function for the common case where the user wishes to
96 * tokenize on the ASCII whitespace characters.
98 * @param set The output token set
99 * @param str The string to be tokenized
101 * @return The number of tokens created
103 size_t create_ws_delimited_tokens( tokset_t
* set
, const char* str
) {
104 return create_tokens( set
, str
, ASCII_WHITESPACE_CHARS
, 1 );
109 * Get a token by its index
111 * @param set The tokenset containing tokens
112 * @param idx The index of the token requested
114 * @return A constant pointer to the token. The tokenset retains
115 * ownership of this memory. If idx is beyond then number of tokens
118 const char* get_token( tokset_t
* set
, size_t idx
) {
120 if(idx
< set
->numtok
) {
122 return set
->tokens
[tidx
];
130 * Get the next token from the tokenset
132 * @param set The tokenset containing tokens
134 * @return A constant pointer to the token. The tokenset retains
135 * ownership of this memory. If the set is already past the final
136 * token NULL is returned.
138 const char* get_next_token( tokset_t
* set
) {
139 size_t tidx
= set
->curidx
;
140 if(set
->curidx
< set
->numtok
) {
142 return set
->tokens
[tidx
];
150 * Accessor for the tokenset length.
152 * @param set The token set
154 * @return The number of tokens in the set
156 size_t get_num_tokens( const tokset_t
* set
) {
161 * Reset the internal counter used by get_next_token(...)
163 * @param set The token set
165 void reset_token_counter( tokset_t
* set
) {
171 * Free all memory internal to the token set, but not the set iteslf.
173 * @param set The token set to be destroyed
175 void free_tokens( tokset_t
* set
) {
177 free(set
->tokens
); set
->tokens
= NULL
;
178 free(set
->victimstr
); set
->victimstr
= NULL
;
186 * Print the internal state of the token set including all tokens
188 * @param set The token set to be printed
190 void print_tokens( const tokset_t
* set
) { // TODO: creates stdio dep. Keep?
195 printf(":::::::::::::::::::::::::::::::::\n");
197 printf(":: Context at %p\n", set
);
198 printf(":: Num Tokens %zu\n", set
->numtok
);
201 printf(":: Tokens ptr at %p\n", set
->tokens
);
204 printf(":: Tokens are NULL\n");
206 if( set
->victimstr
) {
208 printf(":: Victim string at %p\n", set
->victimstr
);
211 printf(":: Victim string is NULL\n");
213 if( tok_ok
&& vs_ok
) {
214 for(i
=0; i
<set
->numtok
; i
+=1) {
215 cur
= set
->tokens
[i
];
216 printf(":: Token[%zu] = %p = [%s]\n", i
, cur
, cur
);
218 cur
= set
->tokens
[set
->curidx
];
219 printf(":: Current Token is %zu = %p = [%s]\n",
220 set
->curidx
, cur
, cur
);
225 printf(":: Context is NULL\n");
227 printf(":::::::::::::::::::::::::::::::::\n");
232 static void do_token_trim( tokset_t
* set
) {
235 char** newtoks
= NULL
;
236 for(i
=0; i
<set
->numtok
; i
+=1) {
237 if(set
->tokens
[i
][0] != '\0') {
238 // Found an non-empty token
243 // reallocate and assign non-empty tokens
244 newtoks
= calloc(count
, sizeof(char*));
245 for(i
=0; i
<set
->numtok
; i
+=1) {
246 if(set
->tokens
[i
][0] != '\0') {
247 newtoks
[j
] = set
->tokens
[i
];
253 set
->tokens
= newtoks
;