Save variables on an unordered_map
[miniREPL.git] / tokenize.c
blobf37c32a36141129ecfc9b8b4a435cfe2505e59cd
1 /**
2 * @file tokenize.c
3 * @author Dan Noland <nolandda@nolandda.org>
4 * @date Wed Mar 7 17:10:49 2018
5 *
6 * @brief Implementation of the tokenizing tools
7 *
8 *
9 */
11 #include <string.h>
12 #include <stdlib.h>
13 #include <stdio.h>
14 #include <assert.h>
15 #define __STDC_FORMAT_MACROS 1
16 #include <inttypes.h>
17 #include "tokenize.h"
19 // Predefined whitespece set
20 const char* ASCII_WHITESPACE_CHARS = " \t\r\n\v\f";
22 // Forward decl of helper function
23 static void do_token_trim( tokset_t* set );
26 /**
27 * Tokenizes a string based on separators and records the result in a
28 * token set
30 * @param set The output token set
31 * @param str The string to be tokenized
32 * @param sep An array of characters to tokenize on
33 * @param trim Boolean value determining if zero length tokens (i.e. empty string "") be removed
35 * @return The number of tokens created.
37 size_t create_tokens( tokset_t* set, const char* str,
38 const char* sep, uint8_t trim ) {
39 size_t i=0, j=0;
40 size_t len = 0;
41 size_t tokcount = 0;
42 char* tstr = NULL;
43 char** tokens = NULL;
44 char* prev = NULL;
46 if( str && sep ) {
47 tokcount = 1;
48 tstr = strdup(str);
49 // Make a first pass marking seperators with '\0' and counting tokens.
50 while(tstr[i]) {
51 j = 0;
52 while(sep[j]) {
53 if(tstr[i] == sep[j]) {
54 // found a seperator
55 tokcount += 1;
56 tstr[i] = '\0';
58 j += 1;
60 i += 1;
62 // we accidentially compute strlen along the way. Minor
63 // optimization here to avoid a call.
64 len = i;
66 // Allocate the tokens
67 tokens = calloc(tokcount, sizeof(char*));
69 // Second pass to assign token pointers
70 prev = tstr;
71 j = 0;
72 for(i=0; i<len; i+=1) {
73 if( tstr[i] == '\0' ) {
74 tokens[j] = prev;
75 prev = tstr+i+1;
76 j += 1;
79 // one more left over after the loop (we never see the final '\0')
80 tokens[j] = prev;
82 set->victimstr = tstr;
83 set->numtok = tokcount;
84 set->tokens = tokens;
85 set->curidx = 0;
86 // Trim out null tokens if the user requested
87 if( trim ) {
88 do_token_trim( set );
91 return set->numtok;
94 /**
95 * Helper function for the common case where the user wishes to
96 * tokenize on the ASCII whitespace characters.
98 * @param set The output token set
99 * @param str The string to be tokenized
101 * @return The number of tokens created
103 size_t create_ws_delimited_tokens( tokset_t* set, const char* str ) {
104 return create_tokens( set, str, ASCII_WHITESPACE_CHARS, 1 );
108 /**
109 * Get a token by its index
111 * @param set The tokenset containing tokens
112 * @param idx The index of the token requested
114 * @return A constant pointer to the token. The tokenset retains
115 * ownership of this memory. If idx is beyond then number of tokens
116 * NULL is returned.
118 const char* get_token( tokset_t* set, size_t idx ) {
119 size_t tidx = idx;
120 if(idx < set->numtok) {
121 set->curidx = idx;
122 return set->tokens[tidx];
124 else {
125 return NULL;
129 /**
130 * Get the next token from the tokenset
132 * @param set The tokenset containing tokens
134 * @return A constant pointer to the token. The tokenset retains
135 * ownership of this memory. If the set is already past the final
136 * token NULL is returned.
138 const char* get_next_token( tokset_t* set ) {
139 size_t tidx = set->curidx;
140 if(set->curidx < set->numtok) {
141 set->curidx += 1;
142 return set->tokens[tidx];
144 else {
145 return NULL;
149 /**
150 * Accessor for the tokenset length.
152 * @param set The token set
154 * @return The number of tokens in the set
156 size_t get_num_tokens( const tokset_t* set ) {
157 return set->numtok;
160 /**
161 * Reset the internal counter used by get_next_token(...)
163 * @param set The token set
165 void reset_token_counter( tokset_t* set ) {
166 set->curidx = 0;
167 return;
170 /**
171 * Free all memory internal to the token set, but not the set iteslf.
173 * @param set The token set to be destroyed
175 void free_tokens( tokset_t* set ) {
176 if( set ) {
177 free(set->tokens); set->tokens = NULL;
178 free(set->victimstr); set->victimstr = NULL;
179 set->numtok = 0;
180 set->curidx = 0;
182 return;
185 /**
186 * Print the internal state of the token set including all tokens
188 * @param set The token set to be printed
190 void print_tokens( const tokset_t* set ) { // TODO: creates stdio dep. Keep?
191 size_t i = 0;
192 int tok_ok = 0;
193 int vs_ok = 0;
194 char* cur = NULL;
195 printf(":::::::::::::::::::::::::::::::::\n");
196 if( set ) {
197 printf(":: Context at %p\n", set);
198 printf(":: Num Tokens %zu\n", set->numtok);
199 if( set->tokens ) {
200 tok_ok = 1;
201 printf(":: Tokens ptr at %p\n", set->tokens);
203 else {
204 printf(":: Tokens are NULL\n");
206 if( set->victimstr ) {
207 vs_ok = 1;
208 printf(":: Victim string at %p\n", set->victimstr);
210 else {
211 printf(":: Victim string is NULL\n");
213 if( tok_ok && vs_ok ) {
214 for(i=0; i<set->numtok; i+=1) {
215 cur = set->tokens[i];
216 printf(":: Token[%zu] = %p = [%s]\n", i, cur, cur);
218 cur = set->tokens[set->curidx];
219 printf(":: Current Token is %zu = %p = [%s]\n",
220 set->curidx, cur, cur);
224 else {
225 printf(":: Context is NULL\n");
227 printf(":::::::::::::::::::::::::::::::::\n");
228 return;
232 static void do_token_trim( tokset_t* set ) {
233 size_t i=0, j=0;
234 size_t count = 0;
235 char** newtoks = NULL;
236 for(i=0; i<set->numtok; i+=1) {
237 if(set->tokens[i][0] != '\0') {
238 // Found an non-empty token
239 count+=1;
243 // reallocate and assign non-empty tokens
244 newtoks = calloc(count, sizeof(char*));
245 for(i=0; i<set->numtok; i+=1) {
246 if(set->tokens[i][0] != '\0') {
247 newtoks[j] = set->tokens[i];
248 j += 1;
252 free(set->tokens);
253 set->tokens = newtoks;
254 set->numtok = count;
255 return;