Little fix after the last commit (mostly a git fail)
[eigenmath-fx.git] / scan.cpp
blob3514c87029b2038381aa09c713d5f108441b8819
1 // This scanner uses the recursive descent method.
2 //
3 // The char pointers token_str and scan_str are pointers to the input string as
4 // in the following example.
5 //
6 // | g | a | m | m | a | | a | l | p | h | a |
7 // ^ ^
8 // token_str scan_str
9 //
10 // The char pointer token_buf points to a malloc buffer.
12 // | g | a | m | m | a | \0 |
13 // ^
14 // token_buf
16 #include "stdafx.h"
17 #include "defs.h"
19 #define T_INTEGER 1001
20 #define T_DOUBLE 1002
21 #define T_SYMBOL 1003
22 #define T_FUNCTION 1004
23 #define T_NEWLINE 1006
24 #define T_STRING 1007
25 #define T_GTEQ 1008
26 #define T_LTEQ 1009
27 #define T_EQ 1010
29 static int token, newline_flag, meta_mode;
30 static char *input_str, *scan_str, *token_str, *token_buf;
32 // Returns number of chars scanned and expr on stack.
34 // Returns zero when nothing left to scan.
36 int
37 scan(char *s)
39 meta_mode = 0;
40 expanding++;
41 input_str = s;
42 scan_str = s;
43 get_next_token();
44 if (token == 0) {
45 push(symbol(NIL));
46 expanding--;
47 return 0;
49 scan_stmt();
50 expanding--;
51 return (int) (token_str - input_str);
54 int
55 scan_meta(char *s)
57 meta_mode = 1;
58 expanding++;
59 input_str = s;
60 scan_str = s;
61 get_next_token();
62 if (token == 0) {
63 push(symbol(NIL));
64 expanding--;
65 return 0;
67 scan_stmt();
68 expanding--;
69 return (int) (token_str - input_str);
72 void
73 scan_stmt(void)
75 scan_relation();
76 if (token == '=') {
77 get_next_token();
78 push_symbol(SETQ);
79 swap();
80 scan_relation();
81 list(3);
85 void
86 scan_relation(void)
88 scan_expression();
89 switch (token) {
90 case T_EQ:
91 push_symbol(TESTEQ);
92 swap();
93 get_next_token();
94 scan_expression();
95 list(3);
96 break;
97 case T_LTEQ:
98 push_symbol(TESTLE);
99 swap();
100 get_next_token();
101 scan_expression();
102 list(3);
103 break;
104 case T_GTEQ:
105 push_symbol(TESTGE);
106 swap();
107 get_next_token();
108 scan_expression();
109 list(3);
110 break;
111 case '<':
112 push_symbol(TESTLT);
113 swap();
114 get_next_token();
115 scan_expression();
116 list(3);
117 break;
118 case '>':
119 push_symbol(TESTGT);
120 swap();
121 get_next_token();
122 scan_expression();
123 list(3);
124 break;
125 default:
126 break;
130 void
131 scan_expression(void)
133 int h = tos;
134 switch (token) {
135 case '+':
136 get_next_token();
137 scan_term();
138 break;
139 case '-':
140 get_next_token();
141 scan_term();
142 negate();
143 break;
144 default:
145 scan_term();
146 break;
148 while (newline_flag == 0 && (token == '+' || token == '-')) {
149 if (token == '+') {
150 get_next_token();
151 scan_term();
152 } else {
153 get_next_token();
154 scan_term();
155 negate();
158 if (tos - h > 1) {
159 list(tos - h);
160 push_symbol(ADD);
161 swap();
162 cons();
167 is_factor(void)
169 switch (token) {
170 case '*':
171 case '/':
172 return 1;
173 case '(':
174 case T_SYMBOL:
175 case T_FUNCTION:
176 case T_INTEGER:
177 case T_DOUBLE:
178 case T_STRING:
179 if (newline_flag) { // implicit mul can't cross line
180 scan_str = token_str; // better error display
181 return 0;
182 } else
183 return 1;
184 default:
185 break;
187 return 0;
190 void
191 scan_term(void)
193 int h = tos;
195 scan_power();
197 // discard integer 1
199 if (tos > h && isrational(stack[tos - 1]) && equaln(stack[tos - 1], 1))
200 pop();
202 while (is_factor()) {
203 if (token == '*') {
204 get_next_token();
205 scan_power();
206 } else if (token == '/') {
207 get_next_token();
208 scan_power();
209 inverse();
210 } else
211 scan_power();
213 // fold constants
215 if (tos > h + 1 && isnum(stack[tos - 2]) && isnum(stack[tos - 1]))
216 multiply();
218 // discard integer 1
220 if (tos > h && isrational(stack[tos - 1]) && equaln(stack[tos - 1], 1))
221 pop();
224 if (h == tos)
225 push_integer(1);
226 else if (tos - h > 1) {
227 list(tos - h);
228 push_symbol(MULTIPLY);
229 swap();
230 cons();
234 void
235 scan_power(void)
237 scan_factor();
238 if (token == '^') {
239 get_next_token();
240 push_symbol(POWER);
241 swap();
242 scan_power();
243 list(3);
247 void
248 scan_factor(void)
250 int h;
252 h = tos;
254 if (token == '(')
255 scan_subexpr();
256 else if (token == T_SYMBOL)
257 scan_symbol();
258 else if (token == T_FUNCTION)
259 scan_function_call();
260 else if (token == T_INTEGER) {
261 bignum_scan_integer(token_buf);
262 get_next_token();
263 } else if (token == T_DOUBLE) {
264 bignum_scan_float(token_buf);
265 get_next_token();
266 } else if (token == T_STRING)
267 scan_string();
268 else
269 error("syntax error");
271 // index
273 if (token == '[') {
274 get_next_token();
275 push_symbol(INDEX);
276 swap();
277 scan_expression();
278 while (token == ',') {
279 get_next_token();
280 scan_expression();
282 if (token != ']')
283 error("] expected");
284 get_next_token();
285 list(tos - h);
288 while (token == '!') {
289 get_next_token();
290 push_symbol(FACTORIAL);
291 swap();
292 list(2);
296 void
297 scan_symbol(void)
299 if (token != T_SYMBOL)
300 error("symbol expected");
301 if (meta_mode && strlen(token_buf) == 1)
302 switch (token_buf[0]) {
303 case 'a':
304 push(symbol(METAA));
305 break;
306 case 'b':
307 push(symbol(METAB));
308 break;
309 case 'x':
310 push(symbol(METAX));
311 break;
312 default:
313 push(usr_symbol(token_buf));
314 break;
316 else
317 push(usr_symbol(token_buf));
318 get_next_token();
321 void
322 scan_string(void)
324 new_string(token_buf);
325 get_next_token();
328 void
329 scan_function_call(void)
331 int n = 1;
332 U *p;
333 p = usr_symbol(token_buf);
334 push(p);
335 get_next_token(); // function name
336 get_next_token(); // left paren
337 if (token != ')') {
338 scan_stmt();
339 n++;
340 while (token == ',') {
341 get_next_token();
342 scan_stmt();
343 n++;
346 if (token != ')')
347 error(") expected");
348 get_next_token();
349 list(n);
352 // scan subexpression
354 void
355 scan_subexpr(void)
357 int n;
358 if (token != '(')
359 error("( expected");
360 get_next_token();
361 scan_stmt();
362 if (token == ',') {
363 n = 1;
364 while (token == ',') {
365 get_next_token();
366 scan_stmt();
367 n++;
369 build_tensor(n);
371 if (token != ')')
372 error(") expected");
373 get_next_token();
376 void
377 error(char *errmsg)
379 printchar(' ');
381 // try not to put question mark on orphan line
383 while (input_str != scan_str) {
384 if ((*input_str == '\n' || *input_str == '\r') && input_str + 1 == scan_str)
385 break;
386 printchar(*input_str++);
389 printstr(" ? ");
391 while (*input_str && (*input_str != '\n' && *input_str != '\r'))
392 printchar(*input_str++);
394 printchar(' ');
396 stop(errmsg);
399 // There are n expressions on the stack, possibly tensors.
401 // This function assembles the stack expressions into a single tensor.
403 // For example, at the top level of the expression ((a,b),(c,d)), the vectors
404 // (a,b) and (c,d) would be on the stack.
406 void
407 build_tensor(int n)
409 // int i, j, k, ndim, nelem;
411 int i;
413 U **s;
415 save();
417 s = stack + tos - n;
419 p2 = alloc_tensor(n);
420 p2->u.tensor->ndim = 1;
421 p2->u.tensor->dim[0] = n;
422 for (i = 0; i < n; i++)
423 p2->u.tensor->elem[i] = s[i];
425 tos -= n;
427 push(p2);
429 restore();
432 void
433 get_next_token()
435 newline_flag = 0;
436 while (1) {
437 get_token();
438 if (token != T_NEWLINE)
439 break;
440 newline_flag = 1;
444 void
445 get_token(void)
447 // skip spaces
449 while (isspace(*scan_str)) {
450 if (*scan_str == '\n' || *scan_str == '\r') {
451 token = T_NEWLINE;
452 scan_str++;
453 return;
455 scan_str++;
458 token_str = scan_str;
460 // end of string?
462 if (*scan_str == 0) {
463 token = 0;
464 return;
467 // number?
469 if (isdigit(*scan_str) || *scan_str == '.') {
470 while (isdigit(*scan_str))
471 scan_str++;
472 if (*scan_str == '.') {
473 scan_str++;
474 while (isdigit(*scan_str))
475 scan_str++;
476 if (*scan_str == 'e' && (scan_str[1] == '+' || scan_str[1] == '-' || isdigit(scan_str[1]))) {
477 scan_str += 2;
478 while (isdigit(*scan_str))
479 scan_str++;
481 token = T_DOUBLE;
482 } else
483 token = T_INTEGER;
484 update_token_buf(token_str, scan_str);
485 return;
488 // symbol?
490 if (isalpha(*scan_str)) {
491 while (isalnum(*scan_str))
492 scan_str++;
493 if (*scan_str == '(')
494 token = T_FUNCTION;
495 else
496 token = T_SYMBOL;
497 update_token_buf(token_str, scan_str);
498 return;
501 // string ?
503 if (*scan_str == '"') {
504 scan_str++;
505 while (*scan_str != '"') {
506 if (*scan_str == 0 || *scan_str == '\n' || *scan_str == '\r')
507 error("runaway string");
508 scan_str++;
510 scan_str++;
511 token = T_STRING;
512 update_token_buf(token_str + 1, scan_str - 1);
513 return;
516 // comment?
518 if (*scan_str == '#' || (*scan_str == '-' && scan_str[1] == '-')) {
519 while (*scan_str && *scan_str != '\n' && *scan_str != '\r')
520 scan_str++;
521 if (*scan_str)
522 scan_str++;
523 token = T_NEWLINE;
524 return;
527 // relational operator?
529 if (*scan_str == '=' && scan_str[1] == '=') {
530 scan_str += 2;
531 token = T_EQ;
532 return;
535 if (*scan_str == '<' && scan_str[1] == '=') {
536 scan_str += 2;
537 token = T_LTEQ;
538 return;
541 if (*scan_str == '>' && scan_str[1] == '=') {
542 scan_str += 2;
543 token = T_GTEQ;
544 return;
547 // single char token
549 token = *scan_str++;
552 void
553 update_token_buf(char *a, char *b)
555 int n;
557 if (token_buf)
558 free(token_buf);
560 n = (int) (b - a);
562 token_buf = (char *) malloc(n + 1);
564 if (token_buf == 0)
565 stop("malloc failure");
567 strncpy(token_buf, a, n);
569 token_buf[n] = 0;
572 // Notes:
574 // Formerly add() and multiply() were used to construct expressions but
575 // this preevaluation caused problems.
577 // For example, suppose A has the floating point value inf.
579 // Before, the expression A/A resulted in 1 because the scanner would
580 // divide the symbols.
582 // After removing add() and multiply(), A/A results in nan which is the
583 // correct result.
585 // The functions negate() and inverse() are used but they do not cause
586 // problems with preevaluation of symbols.