Some tweaks to Lua docs
[lsnes.git] / src / library / mathexpr.cpp
blob6c41cc50ad68925f77562c5f5cf5332586ea3c69
1 #include "mathexpr.hpp"
2 #include "string.hpp"
3 #include <set>
4 #include <map>
6 mathexpr_operinfo::mathexpr_operinfo(std::string funcname)
7 : fnname(funcname), is_operator(false), operands(0), precedence(0), rtl(false)
10 mathexpr_operinfo::mathexpr_operinfo(std::string opername, unsigned _operands, int _percedence, bool _rtl)
11 : fnname(opername), is_operator(true), operands(_operands), precedence(_percedence), rtl(_rtl)
14 mathexpr_operinfo::~mathexpr_operinfo()
18 mathexpr_typeinfo::~mathexpr_typeinfo()
22 mathexpr::mathexpr(mathexpr_typeinfo* _type)
23 : type(*_type)
25 owns_operator = false;
26 state = UNDEFINED;
27 value = NULL;
28 fn = (mathexpr_operinfo*)0xDEADBEEF;
31 mathexpr::mathexpr(mathexpr_typeinfo* _type, gcroot_pointer<mathexpr> fwd)
32 : type(*_type)
34 owns_operator = false;
35 state = FORWARD;
36 value = type.allocate();
37 arguments.push_back(&*fwd);
38 fn = NULL;
41 mathexpr::mathexpr(mathexpr_value _value)
42 : type(*_value.type)
44 owns_operator = false;
45 state = FIXED;
46 value = type.copy_allocate(_value.value);
47 fn = NULL;
50 mathexpr::mathexpr(mathexpr_typeinfo* _type, const std::string& _value, bool string)
51 : type(*_type)
53 owns_operator = false;
54 state = FIXED;
55 value = type.parse(_value, string);
56 fn = NULL;
59 mathexpr::mathexpr(mathexpr_typeinfo* _type, mathexpr_operinfo* _fn, std::vector<gcroot_pointer<mathexpr>> _args,
60 bool _owns_operator)
61 : type(*_type), fn(_fn), owns_operator(_owns_operator)
63 try {
64 for(auto& i : _args)
65 arguments.push_back(&*i);
66 value = type.allocate();
67 state = TO_BE_EVALUATED;
68 } catch(...) {
69 if(owns_operator)
70 delete fn;
71 throw;
75 mathexpr::~mathexpr()
77 if(owns_operator && fn)
78 delete fn;
79 type.deallocate(value);
82 void mathexpr::reset()
84 if(state == TO_BE_EVALUATED || state == FIXED || state == UNDEFINED || state == FORWARD)
85 return;
86 if(state == FORWARD_EVALD || state == FORWARD_EVALING) {
87 state = FORWARD;
88 return;
90 state = TO_BE_EVALUATED;
91 for(auto i : arguments)
92 i->reset();
95 mathexpr::mathexpr(const mathexpr& m)
96 : state(m.state), type(m.type), fn(m.fn), error(m.error), arguments(m.arguments)
98 value = m.value ? type.copy_allocate(m.value) : NULL;
99 if(state == EVALUATING) state = TO_BE_EVALUATED;
102 mathexpr& mathexpr::operator=(const mathexpr& m)
104 if(this == &m)
105 return *this;
106 std::string _error = m.error;
107 std::vector<mathexpr*> _arguments = m.arguments;
108 if(m.value) {
109 if(!value)
110 value = m.type.copy_allocate(m.value);
111 else
112 m.type.copy(value, m.value);
113 } else if(value) {
114 m.type.deallocate(value);
115 value = NULL;
116 } else
117 value = NULL;
118 type = m.type;
119 fn = m.fn;
120 state = m.state;
121 owns_operator = m.owns_operator;
122 m.owns_operator = false;
123 std::swap(arguments, _arguments);
124 std::swap(error, _error);
125 return *this;
128 mathexpr_value mathexpr::evaluate()
130 mathexpr_value ret;
131 ret.type = &type;
132 switch(state) {
133 case TO_BE_EVALUATED:
134 //Need to evaluate.
135 try {
136 for(auto i : arguments) {
137 if(&i->type != &type) {
138 throw mathexpr_error(mathexpr_error::TYPE_MISMATCH,
139 "Types for function mismatch");
142 state = EVALUATING;
143 std::vector<std::function<mathexpr_value()>> promises;
144 for(auto i : arguments) {
145 mathexpr* m = i;
146 promises.push_back([m]() { return m->evaluate(); });
148 mathexpr_value tmp;
149 tmp.type = &type;
150 tmp.value = value;
151 fn->evaluate(tmp, promises);
152 state = EVALUATED;
153 } catch(mathexpr_error& e) {
154 state = FAILED;
155 errcode = e.get_code();
156 error = e.what();
157 throw;
158 } catch(std::exception& e) {
159 state = FAILED;
160 errcode = mathexpr_error::UNKNOWN;
161 error = e.what();
162 throw;
163 } catch(...) {
164 state = FAILED;
165 errcode = mathexpr_error::UNKNOWN;
166 error = "Unknown error";
167 throw;
169 ret.value = value;
170 return ret;
171 case EVALUATING:
172 case FORWARD_EVALING:
173 //Circular dependency.
174 mark_error_and_throw(mathexpr_error::CIRCULAR, "Circular dependency");
175 case EVALUATED:
176 case FIXED:
177 case FORWARD_EVALD:
178 ret.value = value;
179 return ret;
180 case UNDEFINED:
181 throw mathexpr_error(mathexpr_error::UNDEFINED, "Undefined variable");
182 case FAILED:
183 throw mathexpr_error(errcode, error);
184 case FORWARD:
185 try {
186 state = FORWARD_EVALING;
187 mathexpr_value v = arguments[0]->evaluate();
188 type.copy(value, v.value);
189 state = FORWARD_EVALD;
190 return v;
191 } catch(...) {
192 state = FORWARD;
193 throw;
196 throw mathexpr_error(mathexpr_error::INTERNAL, "Internal error (shouldn't be here)");
199 void mathexpr::trace()
201 for(auto i : arguments)
202 i->mark();
205 void mathexpr::mark_error_and_throw(mathexpr_error::errorcode _errcode, const std::string& _error)
207 if(state == EVALUATING) {
208 state = FAILED;
209 errcode = _errcode;
210 error = _error;
212 if(state == FORWARD_EVALING) {
213 state = FORWARD;
215 throw mathexpr_error(_errcode, _error);
218 namespace
221 X_EXPR -> VALUE
222 X_EXPR -> STRING
223 X_EXPR -> NONARY-OP
224 X_EXPR -> FUNCTION X_ARGS
225 X_EXPR -> UNARY-OP X_EXPR
226 X_EXPR -> X_LAMBDA X_EXPR
227 X_LAMBDA -> X_EXPR BINARY-OP
228 X_ARGS -> OPEN-PAREN CLOSE_PAREN
229 X_ARGS -> OPEN-PAREN X_TAIL
230 X_TAIL -> X_PAIR X_TAIL
231 X_TAIL -> X_EXPR CLOSE-PAREN
232 X_PAIR -> X_EXPR COMMA
235 //SUBEXPRESSION -> VALUE
236 //SUBEXPRESSION -> STRING
237 //SUBEXPRESSION -> FUNCTION OPEN-PAREN CLOSE-PAREN
238 //SUBEXPRESSION -> FUNCTION OPEN-PAREN (SUBEXPRESSION COMMA)* SUBEXPRESSION CLOSE-PAREN
239 //SUBEXPRESSION -> OPEN-PAREN SUBEXPRESSION CLOSE-PAREN
240 //SUBEXPRESSION -> SUBEXPRESSION BINARY-OP SUBEXPRESSION
241 //SUBEXPRESSION -> UNARY-OP SUBEXPRESSION
242 //SUBEXPRESSION -> NONARY-OP
244 bool is_alphanumeric(char ch)
246 if(ch >= '0' && ch <= '9') return true;
247 if(ch >= 'a' && ch <= 'z') return true;
248 if(ch >= 'A' && ch <= 'Z') return true;
249 if(ch == '_') return true;
250 if(ch == '.') return true;
251 return false;
254 enum token_kind
256 TT_OPEN_PAREN,
257 TT_CLOSE_PAREN,
258 TT_COMMA,
259 TT_FUNCTION,
260 TT_OPERATOR,
261 TT_VARIABLE,
262 TT_VALUE,
263 TT_STRING,
266 struct operations_set
268 operations_set(std::set<mathexpr_operinfo*>& ops)
269 : operations(ops)
272 mathexpr_operinfo* find_function(const std::string& name)
274 mathexpr_operinfo* fn = NULL;
275 for(auto j : operations) {
276 if(name == j->fnname && !j->is_operator)
277 fn = j;
279 if(!fn) throw std::runtime_error("No such function '" + name + "'");
280 return fn;
282 mathexpr_operinfo* find_operator(const std::string& name, unsigned arity)
284 for(auto j : operations) {
285 if(name == j->fnname && j->is_operator && j->operands == arity)
286 return j;
288 return NULL;
290 private:
291 std::set<mathexpr_operinfo*>& operations;
294 struct subexpression
296 subexpression(token_kind k) : kind(k) {}
297 subexpression(token_kind k, const std::string& str) : kind(k), string(str) {}
298 token_kind kind;
299 std::string string;
302 size_t find_last_in_sub(std::vector<subexpression>& ss, size_t first)
304 size_t depth;
305 switch(ss[first].kind) {
306 case TT_FUNCTION:
307 if(first + 1 == ss.size() || ss[first + 1].kind != TT_OPEN_PAREN)
308 throw std::runtime_error("Function requires argument list");
309 first++;
310 case TT_OPEN_PAREN:
311 depth = 0;
312 while(first < ss.size()) {
313 if(ss[first].kind == TT_OPEN_PAREN)
314 depth++;
315 if(ss[first].kind == TT_CLOSE_PAREN)
316 if(!--depth) break;
317 first++;
319 if(first == ss.size())
320 throw std::runtime_error("Unmatched '('");
321 return first;
322 case TT_CLOSE_PAREN:
323 throw std::runtime_error("Unmatched ')'");
324 case TT_COMMA:
325 throw std::runtime_error("',' only allowed in function arguments");
326 case TT_VALUE:
327 case TT_STRING:
328 case TT_OPERATOR:
329 case TT_VARIABLE:
330 return first;
332 throw std::runtime_error("Internal error (shouldn't be here)");
335 size_t find_end_of_arg(std::vector<subexpression>& ss, size_t first)
337 size_t depth = 0;
338 while(first < ss.size()) {
339 if(depth == 0 && ss[first].kind == TT_COMMA)
340 return first;
341 if(ss[first].kind == TT_OPEN_PAREN)
342 depth++;
343 if(ss[first].kind == TT_CLOSE_PAREN) {
344 if(depth == 0)
345 return first;
346 depth--;
348 first++;
350 return ss.size();
353 struct expr_or_op
355 expr_or_op(gcroot_pointer<mathexpr> e) : expr(e), typei(NULL) {}
356 expr_or_op(std::string o) : op(o), typei(NULL) {}
357 gcroot_pointer<mathexpr> expr;
358 std::string op;
359 mathexpr_operinfo* typei;
362 gcroot_pointer<mathexpr> parse_rec(mathexpr_typeinfo& _type, std::vector<expr_or_op>& operands,
363 size_t first, size_t last)
365 if(operands.empty())
366 return gcroot_pointer<mathexpr>(gcroot_pointer_object_tag(), &_type);
367 if(last - first > 1) {
368 //Find the highest percedence operator.
369 size_t best = last;
370 for(size_t i = first; i < last; i++) {
371 if(operands[i].typei) {
372 if(best == last)
373 best = i;
374 else if(operands[i].typei->precedence < operands[best].typei->precedence) {
375 best = i;
376 } else if(!operands[best].typei->rtl &&
377 operands[i].typei->precedence == operands[best].typei->precedence) {
378 best = i;
382 if(best == last) throw std::runtime_error("Internal error: No operands?");
383 if(operands[best].typei->operands == 1) {
384 //The operator is unary, collect up all following unary operators.
385 size_t j = first;
386 while(operands[j].typei)
387 j++;
388 std::vector<gcroot_pointer<mathexpr>> args;
389 args.push_back(parse_rec(_type, operands, first + 1, j + 1));
390 return gcroot_pointer<mathexpr>(gcroot_pointer_object_tag(), &_type,
391 operands[best].typei, args);
392 } else {
393 //Binary operator.
394 std::vector<gcroot_pointer<mathexpr>> args;
395 args.push_back(parse_rec(_type, operands, first, best));
396 args.push_back(parse_rec(_type, operands, best + 1, last));
397 return gcroot_pointer<mathexpr>(gcroot_pointer_object_tag(), &_type,
398 operands[best].typei, args);
401 return operands[first].expr;
404 gcroot_pointer<mathexpr> parse_rec(mathexpr_typeinfo& _type, std::vector<subexpression>& ss,
405 std::set<mathexpr_operinfo*>& operations,
406 std::function<gcroot_pointer<mathexpr>(const std::string&)> vars, size_t first, size_t last)
408 operations_set opset(operations);
409 std::vector<expr_or_op> operands;
410 std::vector<gcroot_pointer<mathexpr>> args;
411 mathexpr_operinfo* fn;
412 for(size_t i = first; i < last; i++) {
413 size_t l = find_last_in_sub(ss, i);
414 if(l >= last) throw std::runtime_error("Internal error: Improper nesting");
415 switch(ss[i].kind) {
416 case TT_OPEN_PAREN:
417 operands.push_back(parse_rec(_type, ss, operations, vars, i + 1, l));
418 break;
419 case TT_VALUE:
420 operands.push_back(gcroot_pointer<mathexpr>(gcroot_pointer_object_tag(), &_type,
421 ss[i].string, false));
422 break;
423 case TT_STRING:
424 operands.push_back(gcroot_pointer<mathexpr>(gcroot_pointer_object_tag(), &_type,
425 ss[i].string, true));
426 break;
427 case TT_VARIABLE:
428 //We have to warp this is identify transform to make the evaluation lazy.
429 operands.push_back(gcroot_pointer<mathexpr>(gcroot_pointer_object_tag(), &_type,
430 vars(ss[i].string)));
431 break;
432 case TT_FUNCTION:
433 fn = opset.find_function(ss[i].string);
434 i += 2;
435 while(ss[i].kind != TT_CLOSE_PAREN) {
436 size_t k = find_end_of_arg(ss, i);
437 args.push_back(parse_rec(_type, ss, operations, vars, i, k));
438 if(k < ss.size() && ss[k].kind == TT_COMMA)
439 i = k + 1;
440 else
441 i = k;
443 operands.push_back(gcroot_pointer<mathexpr>(gcroot_pointer_object_tag(), &_type, fn,
444 args));
445 args.clear();
446 break;
447 case TT_OPERATOR:
448 operands.push_back(ss[i].string);
449 break;
450 case TT_CLOSE_PAREN:
451 case TT_COMMA:
452 ; //Can't appen.
454 i = l;
456 if(operands.empty())
457 throw std::runtime_error("Empty subexpression");
458 //Translate nonary operators to values.
459 for(auto& i : operands) {
460 if(!(bool)i.expr) {
461 auto fn = opset.find_operator(i.op, 0);
462 if(fn)
463 i.expr = gcroot_pointer<mathexpr>(gcroot_pointer_object_tag(), &_type, fn,
464 std::vector<gcroot_pointer<mathexpr>>());
467 //Check that there aren't two consequtive subexpressions and mark operators.
468 bool was_operand = false;
469 for(auto& i : operands) {
470 bool is_operand = (bool)i.expr;
471 if(!is_operand && !was_operand)
472 if(!(i.typei = opset.find_operator(i.op, 1)))
473 throw std::runtime_error("'" + i.op + "' is not an unary operator");
474 if(!is_operand && was_operand)
475 if(!(i.typei = opset.find_operator(i.op, 2)))
476 throw std::runtime_error("'" + i.op + "' is not a binary operator");
477 if(was_operand && is_operand)
478 throw std::runtime_error("Expected operator, got operand");
479 was_operand = is_operand;
481 if(!was_operand)
482 throw std::runtime_error("Expected operand, got end of subexpression");
483 //Okay, now the expression has been reduced into series of operators and subexpressions.
484 //If there are multiple consequtive operators, the first (except as first item) is binary,
485 //and the others are unary.
486 return parse_rec(_type, operands, 0, operands.size());
489 void tokenize(const std::string& expr, std::set<mathexpr_operinfo*>& operations,
490 std::vector<subexpression>& tokenization)
492 for(size_t i = 0; i < expr.length();) {
493 if(expr[i] == '(') {
494 tokenization.push_back(subexpression(TT_OPEN_PAREN));
495 i++;
496 } else if(expr[i] == ')') {
497 tokenization.push_back(subexpression(TT_CLOSE_PAREN));
498 i++;
499 } else if(expr[i] == ',') {
500 tokenization.push_back(subexpression(TT_COMMA));
501 i++;
502 } else if(expr[i] == ' ') {
503 i++;
504 } else if(expr[i] == '$') {
505 //Variable. If the next character is {, parse until }, otherwise parse until
506 //non-alphanum.
507 std::string varname = "";
508 if(i + 1 < expr.length() && expr[i + 1] == '{') {
509 //Terminate by '}'.
510 i++;
511 while(i + 1 < expr.length() && expr[i + 1] != '}')
512 varname += std::string(1, expr[++i]);
513 if(i + 1 >= expr.length() || expr[i + 1] != '}')
514 throw std::runtime_error("${ without matching }");
515 i++;
516 } else {
517 //Terminate by non-alphanum.
518 while(i + 1 < expr.length() && is_alphanumeric(expr[i + 1]))
519 varname += std::string(1, expr[++i]);
521 tokenization.push_back(subexpression(TT_VARIABLE, varname));
522 i++;
523 } else if(expr[i] == '"') {
524 bool escape = false;
525 size_t endpos = i;
526 endpos++;
527 while(endpos < expr.length() && (escape || expr[endpos] != '"')) {
528 if(!escape) {
529 if(expr[endpos] == '\\')
530 escape = true;
531 endpos++;
532 } else {
533 escape = false;
534 endpos++;
537 if(endpos == expr.length())
538 throw std::runtime_error("Unmatched \"");
539 //Copy (i,endpos-1) and descape.
540 std::string tmp;
541 escape = false;
542 for(size_t j = i + 1; j < endpos; j++) {
543 if(!escape) {
544 if(expr[j] != '\\')
545 tmp += std::string(1, expr[j]);
546 else
547 escape = true;
548 } else {
549 tmp += std::string(1, expr[j]);
550 escape = false;
553 tokenization.push_back(subexpression(TT_STRING, tmp));
554 i = endpos + 1;
555 } else {
556 bool found = false;
557 //Function names are only recognized if it begins here and is followed by '('.
558 for(auto j : operations) {
559 if(j->is_operator) continue; //Not a function.
560 if(i + j->fnname.length() + 1 > expr.length()) continue; //Too long.
561 if(expr[i + j->fnname.length()] != '(') continue; //Not followed by '('.
562 for(size_t k = 0; k < j->fnname.length(); k++)
563 if(expr[i + k] != j->fnname[k]) goto nomatch; //No match.
564 tokenization.push_back(subexpression(TT_FUNCTION, j->fnname));
565 i += j->fnname.length();
566 found = true;
567 break;
568 nomatch: ;
570 if(found) continue;
571 //Operators. These use longest match rule.
572 size_t longest_match = 0;
573 std::string op;
574 for(auto j : operations) {
575 if(!j->is_operator) continue; //Not an operator.
576 if(i + j->fnname.length() > expr.length()) continue; //Too long.
577 for(size_t k = 0; k < j->fnname.length(); k++)
578 if(expr[i + k] != j->fnname[k]) goto next; //No match.
579 if(j->fnname.length() <= longest_match) continue; //Not longest.
580 found = true;
581 op = j->fnname;
582 longest_match = op.length();
583 next: ;
585 if(found) {
586 tokenization.push_back(subexpression(TT_OPERATOR, op));
587 i += op.length();
588 continue;
590 //Okay, token until next non-alphanum.
591 std::string tmp;
592 while(i < expr.length() && is_alphanumeric(expr[i]))
593 tmp += std::string(1, expr[i++]);
594 if(tmp.length()) {
595 tokenization.push_back(subexpression(TT_VALUE, tmp));
596 continue;
598 std::string summary;
599 size_t j;
600 size_t utfcount = 0;
601 for(j = i; j < expr.length() && (j < i + 20 || utfcount); j++) {
602 if(utfcount) utfcount--;
603 summary += std::string(1, expr[j]);
604 if((uint8_t)expr[j] >= 0xF0) utfcount = 3;
605 if((uint8_t)expr[j] >= 0xE0) utfcount = 2;
606 if((uint8_t)expr[j] >= 0xC0) utfcount = 1;
607 if((uint8_t)expr[j] < 0x80) utfcount = 0;
609 if(j < expr.length()) summary += "[...]";
610 throw std::runtime_error("Expression parse error, at '" + summary + "'");
616 gcroot_pointer<mathexpr> mathexpr::parse(mathexpr_typeinfo& _type, const std::string& expr,
617 std::function<gcroot_pointer<mathexpr>(const std::string&)> vars)
619 if(expr == "")
620 throw std::runtime_error("Empty expression");
621 auto operations = _type.operations();
622 std::vector<subexpression> tokenization;
623 tokenize(expr, operations, tokenization);
624 return parse_rec(_type, tokenization, operations, vars, 0, tokenization.size());