tying to deal with a flex problem
[proto.git] / src / compiler / reader.cpp
blobeeefc6979af3e9ade7bae483663ec3fe16f1e571
1 /* Reader finds text and turns it into s-expressions
2 Copyright (C) 2005-2008, Jonathan Bachrach, Jacob Beal, and contributors
3 listed in the AUTHORS file in the MIT Proto distribution's top directory.
5 This file is part of MIT Proto, and is distributed under the terms of
6 the GNU General Public License, with a linking exception, as described
7 in the file LICENSE in the MIT Proto distribution's top directory. */
9 #include <stdio.h>
10 #include <stdarg.h>
11 #include <string.h>
12 #include <ctype.h>
13 #include "reader.h"
15 #include <iostream>
17 void Path::parse_path (string path, list<string> *dirs) {
18 size_t end = 0;
19 list<string> news;
20 for(size_t i=0; ; i=end+1) {
21 end = path.find(':',i);
22 string dir = path.substr(i,end-i);
23 size_t proto_sub = dir.find("$PROTO");
24 if(proto_sub!=string::npos) { dir = proto_dir+dir.substr(proto_sub+6); }
25 if(dir[dir.size()-1]!='/') dir+='/';
26 //cout << "Path contains: '" << dir << "'" << endl;;
27 news.push_back(dir);
28 if(end==string::npos) break;
30 dirs->splice(dirs->begin(),news);
31 //cout << "Path is:" << endl;
32 //for(list<string>::iterator i=dirs->begin(); i!=dirs->end(); i++)
33 // cout << "'" << *i << "'" << endl;
36 void print_token (Token *token) {
37 switch (token->type) {
38 case Token_left_paren: post("LP"); break;
39 case Token_right_paren: post("RP"); break;
40 case Token_symbol: post("N(%s)", (char*)token->name); break;
41 case Token_string: post("S(%s)", (char*)token->name); break;
42 case Token_eof: post("EF"); break;
46 Token *new_token (Token_type type, char *name, int max_size) {
47 Token *tok = (Token*)MALLOC(sizeof(Token));
48 char *buf = (char*)MALLOC(strlen(name)+1);
49 if (strlen(name) > max_size)
50 uerror("NEW TOKEN NAME OVERFLOW %d\n", max_size);
51 strcpy(buf, name);
52 tok->type = type;
53 tok->name = buf;
54 // print_token(tok); debug("\n");
55 return tok;
58 #define BUF_SIZE 1024
60 #define OSTR_CHR '|'
61 #define STR_CHR '\"'
62 #define RSTR_CHR '\''
63 #define Q_CHR '\''
64 #define H_CHR '#'
65 #define C_CHR '\''
67 Token *read_token (char *string, int *start) {
68 int is_str = 0;
69 int i = *start;
70 int j = 0;
71 int len = strlen(string);
72 int is_raw_str = 0;
73 int is_ostr = 0;
74 int is_hash = 0;
75 char c;
76 char buf[BUF_SIZE];
77 // post("READING TOKEN %s %d\n", string, *start);
78 for (;;) {
79 if (j >= BUF_SIZE)
80 uerror("BUF OVERFLOW\n");
81 if (i < len) {
82 c = string[i++]; *start = i;
83 switch (c) {
84 case ' ': case '\t': case '\n': case '\r': break;
85 case '(': return new_token(Token_left_paren, "(", 1);
86 case ')': return new_token(Token_right_paren, ")", 1);
87 case H_CHR:
88 if (i < len) {
89 c = string[i++]; *start = i;
90 if (c == C_CHR)
91 return new_token(Token_char, "#/", 2);
92 else if (c == 'T' || c == 't')
93 return new_token(Token_true, "#T", 2);
94 else if (c == 'F' || c == 'f')
95 return new_token(Token_false, "#F", 2);
96 else
97 uerror("BAD CHAR TOKEN %s %d\n", string, i);
98 } else
99 uerror("BAD CHAR TOKEN %s %d\n", string, i);
100 case OSTR_CHR:
101 is_ostr = 1;
102 case STR_CHR:
103 is_str = 1; goto ready;
104 // case RSTR_CHR: is_raw_str = 1; is_str = 1; goto ready;
105 case Q_CHR: return new_token(Token_quote, "\'", 1);
106 case ';': while (i < len) {
107 c = string[i++]; *start = i;
108 if (c == '\n' || c == '\r') break;
110 return read_token(string, start);
111 default: buf[j++] = c; buf[j] = 0; goto ready;
113 } else
114 return new_token(Token_eof, "", 1);
116 ready:
117 if (is_str) {
118 int is_esc = 0;
119 for (; i < len && j < BUF_SIZE; ) {
120 c = string[i++];
121 *start = i;
122 if (!is_raw_str && c == '\\') {
123 is_esc = 1;
124 continue;
126 if ((!is_esc && !is_raw_str &&
127 ((!is_ostr && c == STR_CHR) || (is_ostr && c == OSTR_CHR))) ||
128 (is_raw_str && c == RSTR_CHR)) {
129 return new_token(Token_string, buf, BUF_SIZE);
131 buf[j++] = (is_esc && c == 'n') ? '\n' : c;
132 buf[j] = 0;
133 is_esc = 0;
135 uerror("unable to find end of string %s\n", buf);
136 } else {
137 for (; i < len; ) {
138 c = string[i++];
139 *start = i;
140 if (c == ')' || c == '(' || c == ' ' || c == '\t' || c == '\n' || c == '\r') {
141 if (c == ')' || c == '(')
142 *start -= 1;
143 return new_token(Token_symbol, buf, BUF_SIZE);
145 buf[j++] = c;
146 buf[j] = 0;
148 *start = i;
149 return new_token(Token_symbol, buf, BUF_SIZE);
153 extern Obj *read_from (Token *token, char *string, int *start);
155 List *read_list (char *string, int *start) {
156 List *_list = lisp_nil;
157 // debug("READING LIST %d\n", *start);
158 for (;;) {
159 Token *token = read_token(string, start);
160 Obj *expr;
161 // print_token(token); debug(" READ LIST TOKEN %d\n", *start);
162 switch (token->type) {
163 case Token_right_paren:
164 case Token_eof:
165 // debug("DONE READING LIST\n");
166 return lst_rev(_list);
168 expr = read_from(token, string, start);
169 // post("PAIRING "); print_object(list); post("\n");
170 _list = _pair(expr, (Obj*)_list);
174 int isnum (char *name) {
175 int i, nump;
176 nump = isdigit(name[0]) || (name[0] == '-' && strlen(name) > 1);
177 for (i = 1; i < strlen(name); i++) {
178 nump = nump && (isdigit(name[i]) || name[i] == '.');
180 return nump;
183 Obj *new_sym_or_num(char *name) {
184 if (isnum(name)) {
185 int inum; flo fnum;
186 int res = sscanf(name, "%f", &fnum);
187 if (res == 1) {
188 return (Obj*)new_num(fnum);
189 } else {
190 res = sscanf(name, "%d", &inum);
191 if (res == 1) {
192 return (Obj*)new_num((flo)inum);
193 } else
194 uerror("UNABLE TO PARSE NUM %s", name);
196 } else
197 return (Obj*)new_sym(name);
200 Obj *read_from (Token *token, char *string, int *start) {
201 // post("READING FROM %s\n", &string[*start]);
202 switch (token->type) {
203 case Token_quote:
204 return (Obj*)_list((Obj*)new_sym("QUOTE"), read_object(string, start), NULL);
205 case Token_char: {
206 Obj *obj = read_object(string, start);
207 if (obj->_class == num_class)
208 return (Obj*)new_num('0' + num_data(obj));
209 else if (obj->_class == sym_class)
210 return (Obj*)new_num(sym_name(obj)[0]);
211 else
212 uerror("BAD CHAR TOKEN\n");
214 case Token_string: return (Obj*)new_str(token->name);
215 case Token_true: return (Obj*)new_num(1);
216 case Token_false: return (Obj*)new_num(0);
217 case Token_symbol: return (Obj*)new_sym_or_num(token->name);
218 case Token_left_paren: return (Obj*)read_list(string, start);
219 case Token_right_paren: uerror("Unbalanced parens\n");
220 case Token_eof: return NULL;
221 default: uerror("Unknown token type %d\n", token->type);
225 Obj *read_object (char *string, int *start) {
226 Token *token = read_token(string, start);
227 return read_from(token, string, start);
230 #define FILE_BUF_SIZE 100000
232 int copy_from_file (ifstream *file, char *buf) {
233 int i=0;
234 while(file->good() && i<FILE_BUF_SIZE-1) buf[i++] = file->get();
235 if(i==FILE_BUF_SIZE-1) uerror("FILE READING BUFFER OVERFLOW %d\n", i);
236 buf[i-1] = 0; // minus 1 because last character came from EOF
237 return 1;
240 List *read_objects_from (ifstream *file) {
241 int start = 0;
242 List *objs = lisp_nil;
243 char buf[FILE_BUF_SIZE];
245 if (!copy_from_file(file, buf))
246 return NULL;
247 for (;;) {
248 Obj *obj = read_object(buf, &start);
249 if (obj == NULL)
250 return lst_rev(objs);
251 else
252 objs = _pair(obj, (Obj*)objs);
256 List *read_objects_from_dirs (string filename, Path *path) {
257 ifstream* file = path->find_in_path(filename);
258 if(file==NULL) { return NULL; }
259 List *res = read_objects_from(file);
260 delete file; return res;
263 List *qq_env (char *str, void *val, ...) {
264 int i, n;
265 va_list ap;
266 List *res = PAIR(PAIR(new_sym(str), PAIR(val, lisp_nil)), lisp_nil);
267 va_start(ap, val);
268 for (n = 1; ; n++) {
269 char *s = va_arg(ap, char *);
270 if (s == NULL) break;
271 Obj *v = va_arg(ap, Obj *);
272 if (v == NULL) break;
273 res = PAIR(PAIR(new_sym(s), PAIR(v, lisp_nil)), res);
275 va_end(ap);
276 return lst_rev(res);
279 Obj *read_from_str (char *str) {
280 int j = 0;
281 Obj *obj = read_object(str, &j);
282 return obj;
285 Obj *qq_lookup(char *name, List *env) {
286 int i;
287 List *args = lisp_nil;
288 for (i = 0; i < lst_len(env); i++) {
289 List *binding = (List*)lst_elt(env, i);
290 if (strcmp(name, sym_name(lst_elt(binding, 0))) == 0)
291 return lst_elt(binding, 1);
293 uerror("Unable to find qq_binding %s", name);
296 Obj *copy_eval_quasi_quote(Obj *obj, List *env) {
297 if (obj->_class == sym_class) {
298 if (sym_name(obj)[0] == '$') {
299 return qq_lookup(sym_name(obj), env);
300 } else
301 return obj;
302 } else if (obj->_class == num_class) {
303 return obj;
304 } else if (obj->_class == lst_class) {
305 int i;
306 int is_dot = 0;
307 List *args = lisp_nil;
308 for (i = 0; i < lst_len((List*)obj); i++) {
309 Obj *copy = copy_eval_quasi_quote(lst_elt((List*)obj, i), env);
310 if (is_dot) {
311 args = lst_rev(args);
312 List *a = args;
313 while (lst_tail(a) != lisp_nil)
314 a = (List*)a->tail;
315 a->tail = copy;
316 return (Obj*)args;
317 } else if (copy->_class == sym_class && strcmp(sym_name(copy), ".") == 0)
318 is_dot = 1;
319 else
320 args = PAIR(copy, args);
322 return (Obj*)lst_rev(args);
323 } else
324 uerror("Unknown quasi quote element %s", obj->_class->name);
327 Obj *read_qq (char *str, List *env) {
328 Obj *obj = read_from_str(str);
329 return copy_eval_quasi_quote(obj, env);