1 /* Reader finds text and turns it into s-expressions
2 Copyright (C) 2005-2008, Jonathan Bachrach, Jacob Beal, and contributors
3 listed in the AUTHORS file in the MIT Proto distribution's top directory.
5 This file is part of MIT Proto, and is distributed under the terms of
6 the GNU General Public License, with a linking exception, as described
7 in the file LICENSE in the MIT Proto distribution's top directory. */
17 void Path::parse_path (string path
, list
<string
> *dirs
) {
20 for(size_t i
=0; ; i
=end
+1) {
21 end
= path
.find(':',i
);
22 string dir
= path
.substr(i
,end
-i
);
23 size_t proto_sub
= dir
.find("$PROTO");
24 if(proto_sub
!=string::npos
) { dir
= proto_dir
+dir
.substr(proto_sub
+6); }
25 if(dir
[dir
.size()-1]!='/') dir
+='/';
26 //cout << "Path contains: '" << dir << "'" << endl;;
28 if(end
==string::npos
) break;
30 dirs
->splice(dirs
->begin(),news
);
31 //cout << "Path is:" << endl;
32 //for(list<string>::iterator i=dirs->begin(); i!=dirs->end(); i++)
33 // cout << "'" << *i << "'" << endl;
36 void print_token (Token
*token
) {
37 switch (token
->type
) {
38 case Token_left_paren
: post("LP"); break;
39 case Token_right_paren
: post("RP"); break;
40 case Token_symbol
: post("N(%s)", (char*)token
->name
); break;
41 case Token_string
: post("S(%s)", (char*)token
->name
); break;
42 case Token_eof
: post("EF"); break;
46 Token
*new_token (Token_type type
, char *name
, int max_size
) {
47 Token
*tok
= (Token
*)MALLOC(sizeof(Token
));
48 char *buf
= (char*)MALLOC(strlen(name
)+1);
49 if (strlen(name
) > max_size
)
50 uerror("NEW TOKEN NAME OVERFLOW %d\n", max_size
);
54 // print_token(tok); debug("\n");
67 Token
*read_token (char *string
, int *start
) {
71 int len
= strlen(string
);
77 // post("READING TOKEN %s %d\n", string, *start);
80 uerror("BUF OVERFLOW\n");
82 c
= string
[i
++]; *start
= i
;
84 case ' ': case '\t': case '\n': case '\r': break;
85 case '(': return new_token(Token_left_paren
, "(", 1);
86 case ')': return new_token(Token_right_paren
, ")", 1);
89 c
= string
[i
++]; *start
= i
;
91 return new_token(Token_char
, "#/", 2);
92 else if (c
== 'T' || c
== 't')
93 return new_token(Token_true
, "#T", 2);
94 else if (c
== 'F' || c
== 'f')
95 return new_token(Token_false
, "#F", 2);
97 uerror("BAD CHAR TOKEN %s %d\n", string
, i
);
99 uerror("BAD CHAR TOKEN %s %d\n", string
, i
);
103 is_str
= 1; goto ready
;
104 // case RSTR_CHR: is_raw_str = 1; is_str = 1; goto ready;
105 case Q_CHR
: return new_token(Token_quote
, "\'", 1);
106 case ';': while (i
< len
) {
107 c
= string
[i
++]; *start
= i
;
108 if (c
== '\n' || c
== '\r') break;
110 return read_token(string
, start
);
111 default: buf
[j
++] = c
; buf
[j
] = 0; goto ready
;
114 return new_token(Token_eof
, "", 1);
119 for (; i
< len
&& j
< BUF_SIZE
; ) {
122 if (!is_raw_str
&& c
== '\\') {
126 if ((!is_esc
&& !is_raw_str
&&
127 ((!is_ostr
&& c
== STR_CHR
) || (is_ostr
&& c
== OSTR_CHR
))) ||
128 (is_raw_str
&& c
== RSTR_CHR
)) {
129 return new_token(Token_string
, buf
, BUF_SIZE
);
131 buf
[j
++] = (is_esc
&& c
== 'n') ? '\n' : c
;
135 uerror("unable to find end of string %s\n", buf
);
140 if (c
== ')' || c
== '(' || c
== ' ' || c
== '\t' || c
== '\n' || c
== '\r') {
141 if (c
== ')' || c
== '(')
143 return new_token(Token_symbol
, buf
, BUF_SIZE
);
149 return new_token(Token_symbol
, buf
, BUF_SIZE
);
153 extern Obj
*read_from (Token
*token
, char *string
, int *start
);
155 List
*read_list (char *string
, int *start
) {
156 List
*_list
= lisp_nil
;
157 // debug("READING LIST %d\n", *start);
159 Token
*token
= read_token(string
, start
);
161 // print_token(token); debug(" READ LIST TOKEN %d\n", *start);
162 switch (token
->type
) {
163 case Token_right_paren
:
165 // debug("DONE READING LIST\n");
166 return lst_rev(_list
);
168 expr
= read_from(token
, string
, start
);
169 // post("PAIRING "); print_object(list); post("\n");
170 _list
= _pair(expr
, (Obj
*)_list
);
174 int isnum (char *name
) {
176 nump
= isdigit(name
[0]) || (name
[0] == '-' && strlen(name
) > 1);
177 for (i
= 1; i
< strlen(name
); i
++) {
178 nump
= nump
&& (isdigit(name
[i
]) || name
[i
] == '.');
183 Obj
*new_sym_or_num(char *name
) {
186 int res
= sscanf(name
, "%f", &fnum
);
188 return (Obj
*)new_num(fnum
);
190 res
= sscanf(name
, "%d", &inum
);
192 return (Obj
*)new_num((flo
)inum
);
194 uerror("UNABLE TO PARSE NUM %s", name
);
197 return (Obj
*)new_sym(name
);
200 Obj
*read_from (Token
*token
, char *string
, int *start
) {
201 // post("READING FROM %s\n", &string[*start]);
202 switch (token
->type
) {
204 return (Obj
*)_list((Obj
*)new_sym("QUOTE"), read_object(string
, start
), NULL
);
206 Obj
*obj
= read_object(string
, start
);
207 if (obj
->_class
== num_class
)
208 return (Obj
*)new_num('0' + num_data(obj
));
209 else if (obj
->_class
== sym_class
)
210 return (Obj
*)new_num(sym_name(obj
)[0]);
212 uerror("BAD CHAR TOKEN\n");
214 case Token_string
: return (Obj
*)new_str(token
->name
);
215 case Token_true
: return (Obj
*)new_num(1);
216 case Token_false
: return (Obj
*)new_num(0);
217 case Token_symbol
: return (Obj
*)new_sym_or_num(token
->name
);
218 case Token_left_paren
: return (Obj
*)read_list(string
, start
);
219 case Token_right_paren
: uerror("Unbalanced parens\n");
220 case Token_eof
: return NULL
;
221 default: uerror("Unknown token type %d\n", token
->type
);
225 Obj
*read_object (char *string
, int *start
) {
226 Token
*token
= read_token(string
, start
);
227 return read_from(token
, string
, start
);
230 #define FILE_BUF_SIZE 100000
232 int copy_from_file (ifstream
*file
, char *buf
) {
234 while(file
->good() && i
<FILE_BUF_SIZE
-1) buf
[i
++] = file
->get();
235 if(i
==FILE_BUF_SIZE
-1) uerror("FILE READING BUFFER OVERFLOW %d\n", i
);
236 buf
[i
-1] = 0; // minus 1 because last character came from EOF
240 List
*read_objects_from (ifstream
*file
) {
242 List
*objs
= lisp_nil
;
243 char buf
[FILE_BUF_SIZE
];
245 if (!copy_from_file(file
, buf
))
248 Obj
*obj
= read_object(buf
, &start
);
250 return lst_rev(objs
);
252 objs
= _pair(obj
, (Obj
*)objs
);
256 List
*read_objects_from_dirs (string filename
, Path
*path
) {
257 ifstream
* file
= path
->find_in_path(filename
);
258 if(file
==NULL
) { return NULL
; }
259 List
*res
= read_objects_from(file
);
260 delete file
; return res
;
263 List
*qq_env (char *str
, void *val
, ...) {
266 List
*res
= PAIR(PAIR(new_sym(str
), PAIR(val
, lisp_nil
)), lisp_nil
);
269 char *s
= va_arg(ap
, char *);
270 if (s
== NULL
) break;
271 Obj
*v
= va_arg(ap
, Obj
*);
272 if (v
== NULL
) break;
273 res
= PAIR(PAIR(new_sym(s
), PAIR(v
, lisp_nil
)), res
);
279 Obj
*read_from_str (char *str
) {
281 Obj
*obj
= read_object(str
, &j
);
285 Obj
*qq_lookup(char *name
, List
*env
) {
287 List
*args
= lisp_nil
;
288 for (i
= 0; i
< lst_len(env
); i
++) {
289 List
*binding
= (List
*)lst_elt(env
, i
);
290 if (strcmp(name
, sym_name(lst_elt(binding
, 0))) == 0)
291 return lst_elt(binding
, 1);
293 uerror("Unable to find qq_binding %s", name
);
296 Obj
*copy_eval_quasi_quote(Obj
*obj
, List
*env
) {
297 if (obj
->_class
== sym_class
) {
298 if (sym_name(obj
)[0] == '$') {
299 return qq_lookup(sym_name(obj
), env
);
302 } else if (obj
->_class
== num_class
) {
304 } else if (obj
->_class
== lst_class
) {
307 List
*args
= lisp_nil
;
308 for (i
= 0; i
< lst_len((List
*)obj
); i
++) {
309 Obj
*copy
= copy_eval_quasi_quote(lst_elt((List
*)obj
, i
), env
);
311 args
= lst_rev(args
);
313 while (lst_tail(a
) != lisp_nil
)
317 } else if (copy
->_class
== sym_class
&& strcmp(sym_name(copy
), ".") == 0)
320 args
= PAIR(copy
, args
);
322 return (Obj
*)lst_rev(args
);
324 uerror("Unknown quasi quote element %s", obj
->_class
->name
);
327 Obj
*read_qq (char *str
, List
*env
) {
328 Obj
*obj
= read_from_str(str
);
329 return copy_eval_quasi_quote(obj
, env
);