tufte layout files:
[lyx.git] / src / mathed / MathExtern.cpp
blobf8f7cba3b1bf6a878e1f67d9a4d77a67bd4e7a4f
1 /**
2 * \file MathExtern.cpp
3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author André Pönitz
8 * Full author contact details are available in file CREDITS.
9 */
11 // This file contains most of the magic that extracts "context
12 // information" from the unstructered layout-oriented stuff in
13 // MathData.
15 #include <config.h>
17 #include "MathExtern.h"
19 #include "InsetMathArray.h"
20 #include "InsetMathChar.h"
21 #include "InsetMathDelim.h"
22 #include "InsetMathDiff.h"
23 #include "InsetMathExFunc.h"
24 #include "InsetMathExInt.h"
25 #include "InsetMathFont.h"
26 #include "InsetMathFrac.h"
27 #include "InsetMathLim.h"
28 #include "InsetMathMatrix.h"
29 #include "InsetMathNumber.h"
30 #include "InsetMathScript.h"
31 #include "InsetMathString.h"
32 #include "InsetMathSymbol.h"
33 #include "MathData.h"
34 #include "MathParser.h"
35 #include "MathStream.h"
37 #include "support/debug.h"
38 #include "support/docstream.h"
39 #include "support/FileName.h"
40 #include "support/filetools.h"
41 #include "support/lstrings.h"
43 #include <algorithm>
44 #include <sstream>
45 #include <fstream>
46 #include <memory>
48 using namespace std;
49 using namespace lyx::support;
51 namespace lyx {
53 static char const * function_names[] = {
54 "arccos", "arcsin", "arctan", "arg", "bmod",
55 "cos", "cosh", "cot", "coth", "csc", "deg",
56 "det", "dim", "exp", "gcd", "hom", "inf", "ker",
57 "lg", "lim", "liminf", "limsup", "ln", "log",
58 "max", "min", "sec", "sin", "sinh", "sup",
59 "tan", "tanh", "Pr", 0
62 static size_t const npos = lyx::docstring::npos;
64 // define a function for tests
65 typedef bool TestItemFunc(MathAtom const &);
67 // define a function for replacing subexpressions
68 typedef MathAtom ReplaceArgumentFunc(const MathData & ar);
72 // try to extract a super/subscript
73 // modify iterator position to point behind the thing
74 bool extractScript(MathData & ar,
75 MathData::iterator & pos, MathData::iterator last, bool superscript)
77 // nothing to get here
78 if (pos == last)
79 return false;
81 // is this a scriptinset?
82 if (!(*pos)->asScriptInset())
83 return false;
85 // do we want superscripts only?
86 if (superscript && !(*pos)->asScriptInset()->hasUp())
87 return false;
89 // it is a scriptinset, use it.
90 ar.push_back(*pos);
91 ++pos;
92 return true;
96 // try to extract an "argument" to some function.
97 // returns position behind the argument
98 MathData::iterator extractArgument(MathData & ar,
99 MathData::iterator pos, MathData::iterator last, bool function = false)
101 // nothing to get here
102 if (pos == last)
103 return pos;
105 // something delimited _is_ an argument
106 if ((*pos)->asDelimInset()) {
107 // leave out delimiters if this is a function argument
108 if (function) {
109 MathData const & arg = (*pos)->asDelimInset()->cell(0);
110 MathData::const_iterator cur = arg.begin();
111 MathData::const_iterator end = arg.end();
112 while (cur != end)
113 ar.push_back(*cur++);
114 } else
115 ar.push_back(*pos);
116 ++pos;
117 if (pos == last)
118 return pos;
119 // if there's one, get following superscript only if this
120 // isn't a function argument
121 if (!function)
122 extractScript(ar, pos, last, true);
123 return pos;
126 // always take the first thing, no matter what it is
127 ar.push_back(*pos);
129 // go ahead if possible
130 ++pos;
131 if (pos == last)
132 return pos;
134 // if the next item is a super/subscript, it most certainly belongs
135 // to the thing we have
136 extractScript(ar, pos, last, false);
137 if (pos == last)
138 return pos;
140 // but it might be more than that.
141 // FIXME: not implemented
142 //for (MathData::iterator it = pos + 1; it != last; ++it) {
143 // // always take the first thing, no matter
144 // if (it == pos) {
145 // ar.push_back(*it);
146 // continue;
147 // }
149 return pos;
153 // returns sequence of char with same code starting at it up to end
154 // it might be less, though...
155 docstring charSequence
156 (MathData::const_iterator it, MathData::const_iterator end)
158 docstring s;
159 for (; it != end && (*it)->asCharInset(); ++it)
160 s += (*it)->getChar();
161 return s;
165 void extractStrings(MathData & ar)
167 //lyxerr << "\nStrings from: " << ar << endl;
168 for (size_t i = 0; i < ar.size(); ++i) {
169 if (!ar[i]->asCharInset())
170 continue;
171 docstring s = charSequence(ar.begin() + i, ar.end());
172 ar[i] = MathAtom(new InsetMathString(s));
173 ar.erase(i + 1, i + s.size());
175 //lyxerr << "\nStrings to: " << ar << endl;
179 void extractMatrices(MathData & ar)
181 //lyxerr << "\nMatrices from: " << ar << endl;
182 // first pass for explicitly delimited stuff
183 for (size_t i = 0; i < ar.size(); ++i) {
184 if (!ar[i]->asDelimInset())
185 continue;
186 MathData const & arr = ar[i]->asDelimInset()->cell(0);
187 if (arr.size() != 1)
188 continue;
189 if (!arr.front()->asGridInset())
190 continue;
191 ar[i] = MathAtom(new InsetMathMatrix(*(arr.front()->asGridInset())));
194 // second pass for AMS "pmatrix" etc
195 for (size_t i = 0; i < ar.size(); ++i)
196 if (ar[i]->asAMSArrayInset())
197 ar[i] = MathAtom(new InsetMathMatrix(*(ar[i]->asGridInset())));
198 //lyxerr << "\nMatrices to: " << ar << endl;
202 // convert this inset somehow to a string
203 bool extractString(MathAtom const & at, docstring & str)
205 if (at->getChar()) {
206 str = docstring(1, at->getChar());
207 return true;
209 if (at->asStringInset()) {
210 str = at->asStringInset()->str();
211 return true;
213 return false;
217 // is this a known function?
218 bool isKnownFunction(docstring const & str)
220 for (int i = 0; function_names[i]; ++i) {
221 if (str == function_names[i])
222 return true;
224 return false;
228 // extract a function name from this inset
229 bool extractFunctionName(MathAtom const & at, docstring & str)
231 if (at->asSymbolInset()) {
232 str = at->asSymbolInset()->name();
233 return isKnownFunction(str);
235 if (at->asUnknownInset()) {
236 // assume it is well known...
237 str = at->name();
238 return true;
240 if (at->asFontInset() && at->name() == "mathrm") {
241 // assume it is well known...
242 MathData const & ar = at->asFontInset()->cell(0);
243 str = charSequence(ar.begin(), ar.end());
244 return ar.size() == str.size();
246 return false;
250 // convert this inset somehow to a number
251 bool extractNumber(MathData const & ar, int & i)
253 idocstringstream is(charSequence(ar.begin(), ar.end()));
254 is >> i;
255 return is;
259 bool extractNumber(MathData const & ar, double & d)
261 idocstringstream is(charSequence(ar.begin(), ar.end()));
262 is >> d;
263 return is;
267 bool testString(MathAtom const & at, docstring const & str)
269 docstring s;
270 return extractString(at, s) && str == s;
274 bool testString(MathAtom const & at, char const * const str)
276 return testString(at, from_ascii(str));
279 // search end of nested sequence
280 MathData::iterator endNestSearch(
281 MathData::iterator it,
282 MathData::iterator last,
283 TestItemFunc testOpen,
284 TestItemFunc testClose
287 for (int level = 0; it != last; ++it) {
288 if (testOpen(*it))
289 ++level;
290 if (testClose(*it))
291 --level;
292 if (level == 0)
293 break;
295 return it;
299 // replace nested sequences by a real Insets
300 void replaceNested(
301 MathData & ar,
302 TestItemFunc testOpen,
303 TestItemFunc testClose,
304 ReplaceArgumentFunc replaceArg)
306 Buffer * buf = ar.buffer();
307 // use indices rather than iterators for the loop because we are going
308 // to modify the array.
309 for (size_t i = 0; i < ar.size(); ++i) {
310 // check whether this is the begin of the sequence
311 if (!testOpen(ar[i]))
312 continue;
314 // search end of sequence
315 MathData::iterator it = ar.begin() + i;
316 MathData::iterator jt = endNestSearch(it, ar.end(), testOpen, testClose);
317 if (jt == ar.end())
318 continue;
320 // replace the original stuff by the new inset
321 ar[i] = replaceArg(MathData(buf, it + 1, jt));
322 ar.erase(it + 1, jt + 1);
329 // split scripts into seperate super- and subscript insets. sub goes in
330 // front of super...
333 void splitScripts(MathData & ar)
335 Buffer * buf = ar.buffer();
336 //lyxerr << "\nScripts from: " << ar << endl;
337 for (size_t i = 0; i < ar.size(); ++i) {
338 InsetMathScript const * script = ar[i]->asScriptInset();
340 // is this a script inset and do we also have a superscript?
341 if (!script || !script->hasUp())
342 continue;
344 // we must have a nucleus if we only have a superscript
345 if (!script->hasDown() && script->nuc().size() == 0)
346 continue;
348 if (script->nuc().size() == 1) {
349 // leave alone sums and integrals
350 InsetMathSymbol const * sym =
351 script->nuc().front()->asSymbolInset();
352 if (sym && (sym->name() == "sum" || sym->name() == "int"))
353 continue;
356 // create extra script inset and move superscript over
357 InsetMathScript * p = ar[i].nucleus()->asScriptInset();
358 auto_ptr<InsetMathScript> q(new InsetMathScript(buf, true));
359 swap(q->up(), p->up());
360 p->removeScript(true);
362 // if we don't have a subscript, get rid of the ScriptInset
363 if (!script->hasDown()) {
364 MathData arg(p->nuc());
365 MathData::const_iterator it = arg.begin();
366 MathData::const_iterator et = arg.end();
367 ar.erase(i);
368 while (it != et)
369 ar.insert(i++, *it++);
370 } else
371 ++i;
373 // insert new inset behind
374 ar.insert(i, MathAtom(q.release()));
376 //lyxerr << "\nScripts to: " << ar << endl;
381 // extract exp(...)
384 void extractExps(MathData & ar)
386 Buffer * buf = ar.buffer();
387 //lyxerr << "\nExps from: " << ar << endl;
388 for (size_t i = 0; i + 1 < ar.size(); ++i) {
389 // is this 'e'?
390 if (ar[i]->getChar() != 'e')
391 continue;
393 // we need an exponent but no subscript
394 InsetMathScript const * sup = ar[i + 1]->asScriptInset();
395 if (!sup || sup->hasDown())
396 continue;
398 // create a proper exp-inset as replacement
399 ar[i] = MathAtom(new InsetMathExFunc(buf, from_ascii("exp"), sup->cell(1)));
400 ar.erase(i + 1);
402 //lyxerr << "\nExps to: " << ar << endl;
407 // extract det(...) from |matrix|
409 void extractDets(MathData & ar)
411 Buffer * buf = ar.buffer();
412 //lyxerr << "\ndet from: " << ar << endl;
413 for (MathData::iterator it = ar.begin(); it != ar.end(); ++it) {
414 InsetMathDelim const * del = (*it)->asDelimInset();
415 if (!del)
416 continue;
417 if (!del->isAbs())
418 continue;
419 *it = MathAtom(new InsetMathExFunc(buf, from_ascii("det"), del->cell(0)));
421 //lyxerr << "\ndet to: " << ar << endl;
426 // search numbers
429 bool isDigitOrSimilar(char_type c)
431 return ('0' <= c && c <= '9') || c == '.';
435 // returns sequence of digits
436 docstring digitSequence
437 (MathData::const_iterator it, MathData::const_iterator end)
439 docstring s;
440 for (; it != end && (*it)->asCharInset(); ++it) {
441 if (!isDigitOrSimilar((*it)->getChar()))
442 break;
443 s += (*it)->getChar();
445 return s;
449 void extractNumbers(MathData & ar)
451 //lyxerr << "\nNumbers from: " << ar << endl;
452 for (size_t i = 0; i < ar.size(); ++i) {
453 if (!ar[i]->asCharInset())
454 continue;
455 if (!isDigitOrSimilar(ar[i]->asCharInset()->getChar()))
456 continue;
458 docstring s = digitSequence(ar.begin() + i, ar.end());
460 ar[i] = MathAtom(new InsetMathNumber(s));
461 ar.erase(i + 1, i + s.size());
463 //lyxerr << "\nNumbers to: " << ar << endl;
469 // search delimiters
472 bool testOpenParen(MathAtom const & at)
474 return testString(at, "(");
478 bool testCloseParen(MathAtom const & at)
480 return testString(at, ")");
484 MathAtom replaceParenDelims(const MathData & ar)
486 return MathAtom(new InsetMathDelim(const_cast<Buffer *>(ar.buffer()),
487 from_ascii("("), from_ascii(")"), ar));
491 bool testOpenBracket(MathAtom const & at)
493 return testString(at, "[");
497 bool testCloseBracket(MathAtom const & at)
499 return testString(at, "]");
503 MathAtom replaceBracketDelims(const MathData & ar)
505 return MathAtom(new InsetMathDelim(const_cast<Buffer *>(ar.buffer()),
506 from_ascii("["), from_ascii("]"), ar));
510 // replace '('...')' and '['...']' sequences by a real InsetMathDelim
511 void extractDelims(MathData & ar)
513 //lyxerr << "\nDelims from: " << ar << endl;
514 replaceNested(ar, testOpenParen, testCloseParen, replaceParenDelims);
515 replaceNested(ar, testOpenBracket, testCloseBracket, replaceBracketDelims);
516 //lyxerr << "\nDelims to: " << ar << endl;
522 // search well-known functions
526 // replace 'f' '(...)' and 'f' '^n' '(...)' sequences by a real InsetMathExFunc
527 // assume 'extractDelims' ran before
528 void extractFunctions(MathData & ar)
530 // we need at least two items...
531 if (ar.size() < 2)
532 return;
534 Buffer * buf = ar.buffer();
536 //lyxerr << "\nFunctions from: " << ar << endl;
537 for (size_t i = 0; i + 1 < ar.size(); ++i) {
538 MathData::iterator it = ar.begin() + i;
539 MathData::iterator jt = it + 1;
541 docstring name;
542 // is it a function?
543 // it certainly is if it is well known...
544 if (!extractFunctionName(*it, name)) {
545 // is this a user defined function?
546 // it it probably not, if it doesn't have a name.
547 if (!extractString(*it, name))
548 continue;
549 // it is not if it has no argument
550 if (jt == ar.end())
551 continue;
552 // guess so, if this is followed by
553 // a DelimInset with a single item in the cell
554 InsetMathDelim const * del = (*jt)->asDelimInset();
555 if (!del || del->cell(0).size() != 1)
556 continue;
557 // fall trough into main branch
560 // do we have an exponent like in
561 // 'sin' '^2' 'x' -> 'sin(x)' '^2'
562 MathData exp;
563 extractScript(exp, jt, ar.end(), true);
565 // create a proper inset as replacement
566 auto_ptr<InsetMathExFunc> p(new InsetMathExFunc(buf, name));
568 // jt points to the "argument". Get hold of this.
569 MathData::iterator st = extractArgument(p->cell(0), jt, ar.end(), true);
571 // replace the function name by a real function inset
572 *it = MathAtom(p.release());
574 // remove the source of the argument from the array
575 ar.erase(it + 1, st);
577 // re-insert exponent
578 ar.insert(i + 1, exp);
579 //lyxerr << "\nFunctions to: " << ar << endl;
585 // search integrals
588 bool testSymbol(MathAtom const & at, docstring const & name)
590 return at->asSymbolInset() && at->asSymbolInset()->name() == name;
594 bool testSymbol(MathAtom const & at, char const * const name)
596 return at->asSymbolInset() && at->asSymbolInset()->name() == from_ascii(name);
600 bool testIntSymbol(MathAtom const & at)
602 return testSymbol(at, from_ascii("int"));
606 bool testIntegral(MathAtom const & at)
608 return
609 testIntSymbol(at) ||
610 ( at->asScriptInset()
611 && at->asScriptInset()->nuc().size()
612 && testIntSymbol(at->asScriptInset()->nuc().back()) );
617 bool testIntDiff(MathAtom const & at)
619 return testString(at, "d");
623 // replace '\int' ['_^'] x 'd''x'(...)' sequences by a real InsetMathExInt
624 // assume 'extractDelims' ran before
625 void extractIntegrals(MathData & ar)
627 // we need at least three items...
628 if (ar.size() < 3)
629 return;
631 Buffer * buf = ar.buffer();
633 //lyxerr << "\nIntegrals from: " << ar << endl;
634 for (size_t i = 0; i + 1 < ar.size(); ++i) {
635 MathData::iterator it = ar.begin() + i;
637 // search 'd'
638 MathData::iterator jt =
639 endNestSearch(it, ar.end(), testIntegral, testIntDiff);
641 // something sensible found?
642 if (jt == ar.end())
643 continue;
645 // is this a integral name?
646 if (!testIntegral(*it))
647 continue;
649 // core ist part from behind the scripts to the 'd'
650 auto_ptr<InsetMathExInt> p(new InsetMathExInt(buf, from_ascii("int")));
652 // handle scripts if available
653 if (!testIntSymbol(*it)) {
654 p->cell(2) = (*it)->asScriptInset()->down();
655 p->cell(3) = (*it)->asScriptInset()->up();
657 p->cell(0) = MathData(buf, it + 1, jt);
659 // use the "thing" behind the 'd' as differential
660 MathData::iterator tt = extractArgument(p->cell(1), jt + 1, ar.end());
662 // remove used parts
663 ar.erase(it + 1, tt);
664 *it = MathAtom(p.release());
666 //lyxerr << "\nIntegrals to: " << ar << endl;
670 bool testTermDelimiter(MathAtom const & at)
672 return testString(at, "+") || testString(at, "-");
676 // try to extract a "term", i.e., something delimited by '+' or '-'.
677 // returns position behind the term
678 MathData::iterator extractTerm(MathData & ar,
679 MathData::iterator pos, MathData::iterator last)
681 while (pos != last && !testTermDelimiter(*pos)) {
682 ar.push_back(*pos);
683 ++pos;
685 return pos;
690 // search sums
694 bool testEqualSign(MathAtom const & at)
696 return testString(at, "=");
700 bool testSumSymbol(MathAtom const & p)
702 return testSymbol(p, from_ascii("sum"));
706 bool testSum(MathAtom const & at)
708 return
709 testSumSymbol(at) ||
710 ( at->asScriptInset()
711 && at->asScriptInset()->nuc().size()
712 && testSumSymbol(at->asScriptInset()->nuc().back()) );
716 // replace '\sum' ['_^'] f(x) sequences by a real InsetMathExInt
717 // assume 'extractDelims' ran before
718 void extractSums(MathData & ar)
720 // we need at least two items...
721 if (ar.size() < 2)
722 return;
724 Buffer * buf = ar.buffer();
726 //lyxerr << "\nSums from: " << ar << endl;
727 for (size_t i = 0; i + 1 < ar.size(); ++i) {
728 MathData::iterator it = ar.begin() + i;
730 // is this a sum name?
731 if (!testSum(ar[i]))
732 continue;
734 // create a proper inset as replacement
735 auto_ptr<InsetMathExInt> p(new InsetMathExInt(buf, from_ascii("sum")));
737 // collect lower bound and summation index
738 InsetMathScript const * sub = ar[i]->asScriptInset();
739 if (sub && sub->hasDown()) {
740 // try to figure out the summation index from the subscript
741 MathData const & ar = sub->down();
742 MathData::const_iterator xt =
743 find_if(ar.begin(), ar.end(), &testEqualSign);
744 if (xt != ar.end()) {
745 // we found a '=', use everything in front of that as index,
746 // and everything behind as lower index
747 p->cell(1) = MathData(buf, ar.begin(), xt);
748 p->cell(2) = MathData(buf, xt + 1, ar.end());
749 } else {
750 // use everything as summation index, don't use scripts.
751 p->cell(1) = ar;
755 // collect upper bound
756 if (sub && sub->hasUp())
757 p->cell(3) = sub->up();
759 // use something behind the script as core
760 MathData::iterator tt = extractTerm(p->cell(0), it + 1, ar.end());
762 // cleanup
763 ar.erase(it + 1, tt);
764 *it = MathAtom(p.release());
766 //lyxerr << "\nSums to: " << ar << endl;
771 // search differential stuff
774 // tests for 'd' or '\partial'
775 bool testDiffItem(MathAtom const & at)
777 if (testString(at, "d") || testSymbol(at, "partial"))
778 return true;
780 // we may have d^n .../d and splitScripts() has not yet seen it
781 InsetMathScript const * sup = at->asScriptInset();
782 if (sup && !sup->hasDown() && sup->hasUp() && sup->nuc().size() == 1) {
783 MathAtom const & ma = sup->nuc().front();
784 return testString(ma, "d") || testSymbol(ma, "partial");
786 return false;
790 bool testDiffArray(MathData const & ar)
792 return ar.size() && testDiffItem(ar.front());
796 bool testDiffFrac(MathAtom const & at)
798 return
799 at->asFracInset()
800 && testDiffArray(at->asFracInset()->cell(0))
801 && testDiffArray(at->asFracInset()->cell(1));
805 void extractDiff(MathData & ar)
807 Buffer * buf = ar.buffer();
808 //lyxerr << "\nDiffs from: " << ar << endl;
809 for (size_t i = 0; i < ar.size(); ++i) {
810 MathData::iterator it = ar.begin() + i;
812 // is this a "differential fraction"?
813 if (!testDiffFrac(*it))
814 continue;
816 InsetMathFrac const * f = (*it)->asFracInset();
817 if (!f) {
818 lyxerr << "should not happen" << endl;
819 continue;
822 // create a proper diff inset
823 auto_ptr<InsetMathDiff> diff(new InsetMathDiff(buf));
825 // collect function, let jt point behind last used item
826 MathData::iterator jt = it + 1;
827 //int n = 1;
828 MathData numer(f->cell(0));
829 splitScripts(numer);
830 if (numer.size() > 1 && numer[1]->asScriptInset()) {
831 // this is something like d^n f(x) / d... or d^n / d...
832 // FIXME
833 //n = 1;
834 if (numer.size() > 2)
835 diff->cell(0) = MathData(buf, numer.begin() + 2, numer.end());
836 else
837 jt = extractTerm(diff->cell(0), jt, ar.end());
838 } else {
839 // simply d f(x) / d... or d/d...
840 if (numer.size() > 1)
841 diff->cell(0) = MathData(buf, numer.begin() + 1, numer.end());
842 else
843 jt = extractTerm(diff->cell(0), jt, ar.end());
846 // collect denominator parts
847 MathData denom(f->cell(1));
848 splitScripts(denom);
849 for (MathData::iterator dt = denom.begin(); dt != denom.end();) {
850 // find the next 'd'
851 MathData::iterator et
852 = find_if(dt + 1, denom.end(), &testDiffItem);
854 // point before this
855 MathData::iterator st = et - 1;
856 InsetMathScript const * script = (*st)->asScriptInset();
857 if (script && script->hasUp()) {
858 // things like d.../dx^n
859 int mult = 1;
860 if (extractNumber(script->up(), mult)) {
861 //lyxerr << "mult: " << mult << endl;
862 for (int i = 0; i < mult; ++i)
863 diff->addDer(MathData(buf, dt + 1, st));
865 } else {
866 // just d.../dx
867 diff->addDer(MathData(buf, dt + 1, et));
869 dt = et;
872 // cleanup
873 ar.erase(it + 1, jt);
874 *it = MathAtom(diff.release());
876 //lyxerr << "\nDiffs to: " << ar << endl;
881 // search limits
885 bool testRightArrow(MathAtom const & at)
887 return testSymbol(at, "to") || testSymbol(at, "rightarrow");
892 // replace '\lim_{x->x0} f(x)' sequences by a real InsetMathLim
893 // assume 'extractDelims' ran before
894 void extractLims(MathData & ar)
896 Buffer * buf = ar.buffer();
897 //lyxerr << "\nLimits from: " << ar << endl;
898 for (size_t i = 0; i < ar.size(); ++i) {
899 MathData::iterator it = ar.begin() + i;
901 // must be a script inset with a subscript (without superscript)
902 InsetMathScript const * sub = (*it)->asScriptInset();
903 if (!sub || !sub->hasDown() || sub->hasUp() || sub->nuc().size() != 1)
904 continue;
906 // is this a limit function?
907 if (!testSymbol(sub->nuc().front(), "lim"))
908 continue;
910 // subscript must contain a -> symbol
911 MathData const & s = sub->down();
912 MathData::const_iterator st = find_if(s.begin(), s.end(), &testRightArrow);
913 if (st == s.end())
914 continue;
916 // the -> splits the subscript int x and x0
917 MathData x = MathData(buf, s.begin(), st);
918 MathData x0 = MathData(buf, st + 1, s.end());
920 // use something behind the script as core
921 MathData f;
922 MathData::iterator tt = extractTerm(f, it + 1, ar.end());
924 // cleanup
925 ar.erase(it + 1, tt);
927 // create a proper inset as replacement
928 *it = MathAtom(new InsetMathLim(buf, f, x, x0));
930 //lyxerr << "\nLimits to: " << ar << endl;
935 // combine searches
938 void extractStructure(MathData & ar)
940 //lyxerr << "\nStructure from: " << ar << endl;
941 splitScripts(ar);
942 extractDelims(ar);
943 extractIntegrals(ar);
944 extractSums(ar);
945 extractNumbers(ar);
946 extractMatrices(ar);
947 extractFunctions(ar);
948 extractDets(ar);
949 extractDiff(ar);
950 extractExps(ar);
951 extractLims(ar);
952 extractStrings(ar);
953 //lyxerr << "\nStructure to: " << ar << endl;
957 void write(MathData const & dat, WriteStream & wi)
959 MathData ar = dat;
960 extractStrings(ar);
961 wi.firstitem() = true;
962 for (MathData::const_iterator it = ar.begin(); it != ar.end(); ++it) {
963 (*it)->write(wi);
964 wi.firstitem() = false;
969 void normalize(MathData const & ar, NormalStream & os)
971 for (MathData::const_iterator it = ar.begin(); it != ar.end(); ++it)
972 (*it)->normalize(os);
976 void octave(MathData const & dat, OctaveStream & os)
978 MathData ar = dat;
979 extractStructure(ar);
980 for (MathData::const_iterator it = ar.begin(); it != ar.end(); ++it)
981 (*it)->octave(os);
985 void maple(MathData const & dat, MapleStream & os)
987 MathData ar = dat;
988 extractStructure(ar);
989 for (MathData::const_iterator it = ar.begin(); it != ar.end(); ++it)
990 (*it)->maple(os);
994 void maxima(MathData const & dat, MaximaStream & os)
996 MathData ar = dat;
997 extractStructure(ar);
998 for (MathData::const_iterator it = ar.begin(); it != ar.end(); ++it)
999 (*it)->maxima(os);
1003 void mathematica(MathData const & dat, MathematicaStream & os)
1005 MathData ar = dat;
1006 extractStructure(ar);
1007 for (MathData::const_iterator it = ar.begin(); it != ar.end(); ++it)
1008 (*it)->mathematica(os);
1012 void mathmlize(MathData const & dat, MathStream & os)
1014 MathData ar = dat;
1015 extractStructure(ar);
1016 if (ar.size() == 0)
1017 os << "<mrow/>";
1018 else if (ar.size() == 1)
1019 os << ar.front();
1020 else {
1021 os << MTag("mrow");
1022 for (MathData::const_iterator it = ar.begin(); it != ar.end(); ++it)
1023 (*it)->mathmlize(os);
1024 os << ETag("mrow");
1031 namespace {
1033 string captureOutput(string const & cmd, string const & data)
1035 // In order to avoid parsing problems with command interpreters
1036 // we pass input data through a file
1037 FileName const cas_tmpfile = FileName::tempName("casinput");
1038 if (cas_tmpfile.empty()) {
1039 lyxerr << "Warning: cannot create temporary file."
1040 << endl;
1041 return string();
1043 ofstream os(cas_tmpfile.toFilesystemEncoding().c_str());
1044 os << data << endl;
1045 os.close();
1046 string command = cmd + " < "
1047 + quoteName(cas_tmpfile.toFilesystemEncoding());
1048 lyxerr << "calling: " << cmd
1049 << "\ninput: '" << data << "'" << endl;
1050 cmd_ret const ret = runCommand(command);
1051 cas_tmpfile.removeFile();
1052 return ret.second;
1055 size_t get_matching_brace(string const & str, size_t i)
1057 int count = 1;
1058 size_t n = str.size();
1059 while (i < n) {
1060 i = str.find_first_of("{}", i+1);
1061 if (i == npos)
1062 return i;
1063 if (str[i] == '{')
1064 ++count;
1065 else
1066 --count;
1067 if (count == 0)
1068 return i;
1070 return npos;
1073 size_t get_matching_brace_back(string const & str, size_t i)
1075 int count = 1;
1076 while (i > 0) {
1077 i = str.find_last_of("{}", i-1);
1078 if (i == npos)
1079 return i;
1080 if (str[i] == '}')
1081 ++count;
1082 else
1083 --count;
1084 if (count == 0)
1085 return i;
1087 return npos;
1090 MathData pipeThroughMaxima(docstring const &, MathData const & ar)
1092 odocstringstream os;
1093 MaximaStream ms(os);
1094 ms << ar;
1095 docstring expr = os.str();
1096 docstring const header = from_ascii("simpsum:true;");
1098 string out;
1099 for (int i = 0; i < 100; ++i) { // at most 100 attempts
1100 // try to fix missing '*' the hard way
1102 // > echo "2x;" | maxima
1103 // ...
1104 // (C1) Incorrect syntax: x is not an infix operator
1105 // 2x;
1106 // ^
1108 lyxerr << "checking expr: '" << to_utf8(expr) << "'" << endl;
1109 docstring full = header + "tex(" + expr + ");";
1110 out = captureOutput("maxima", to_utf8(full));
1112 // leave loop if expression syntax is probably ok
1113 if (out.find("Incorrect syntax") == npos)
1114 break;
1116 // search line with "Incorrect syntax"
1117 istringstream is(out);
1118 string line;
1119 while (is) {
1120 getline(is, line);
1121 if (line.find("Incorrect syntax") != npos)
1122 break;
1125 // 2nd next line is the one with caret
1126 getline(is, line);
1127 getline(is, line);
1128 size_t pos = line.find('^');
1129 lyxerr << "found caret at pos: '" << pos << "'" << endl;
1130 if (pos == npos || pos < 4)
1131 break; // caret position not found
1132 pos -= 4; // skip the "tex(" part
1133 if (expr[pos] == '*')
1134 break; // two '*' in a row are definitely bad
1135 expr.insert(pos, from_ascii("*"));
1138 vector<string> tmp = getVectorFromString(out, "$$");
1139 if (tmp.size() < 2)
1140 return MathData();
1142 out = subst(tmp[1], "\\>", string());
1143 lyxerr << "output: '" << out << "'" << endl;
1145 // Ugly code that tries to make the result prettier
1146 size_t i = out.find("\\mathchoice");
1147 while (i != npos) {
1148 size_t j = get_matching_brace(out, i + 12);
1149 size_t k = get_matching_brace(out, j + 1);
1150 k = get_matching_brace(out, k + 1);
1151 k = get_matching_brace(out, k + 1);
1152 string mid = out.substr(i + 13, j - i - 13);
1153 if (mid.find("\\over") != npos)
1154 mid = '{' + mid + '}';
1155 out = out.substr(0, i)
1156 + mid
1157 + out.substr(k + 1);
1158 //lyxerr << "output: " << out << endl;
1159 i = out.find("\\mathchoice", i);
1160 break;
1163 i = out.find("\\over");
1164 while (i != npos) {
1165 size_t j = get_matching_brace_back(out, i - 1);
1166 if (j == npos || j == 0)
1167 break;
1168 size_t k = get_matching_brace(out, i + 5);
1169 if (k == npos || k + 1 == out.size())
1170 break;
1171 out = out.substr(0, j - 1)
1172 + "\\frac"
1173 + out.substr(j, i - j)
1174 + out.substr(i + 5, k - i - 4)
1175 + out.substr(k + 2);
1176 //lyxerr << "output: " << out << endl;
1177 i = out.find("\\over", i + 4);
1179 MathData res;
1180 mathed_parse_cell(res, from_utf8(out));
1181 return res;
1185 MathData pipeThroughMaple(docstring const & extra, MathData const & ar)
1187 string header = "readlib(latex):\n";
1189 // remove the \\it for variable names
1190 //"#`latex/csname_font` := `\\it `:"
1191 header +=
1192 "`latex/csname_font` := ``:\n";
1194 // export matrices in (...) instead of [...]
1195 header +=
1196 "`latex/latex/matrix` := "
1197 "subs(`[`=`(`, `]`=`)`,"
1198 "eval(`latex/latex/matrix`)):\n";
1200 // replace \\cdots with proper '*'
1201 header +=
1202 "`latex/latex/*` := "
1203 "subs(`\\,`=`\\cdot `,"
1204 "eval(`latex/latex/*`)):\n";
1206 // remove spurious \\noalign{\\medskip} in matrix output
1207 header +=
1208 "`latex/latex/matrix`:= "
1209 "subs(`\\\\\\\\\\\\noalign{\\\\medskip}` = `\\\\\\\\`,"
1210 "eval(`latex/latex/matrix`)):\n";
1212 //"#`latex/latex/symbol` "
1213 // " := subs((\\'_\\' = \\'`\\_`\\',eval(`latex/latex/symbol`)): ";
1215 string trailer = "quit;";
1216 odocstringstream os;
1217 MapleStream ms(os);
1218 ms << ar;
1219 string expr = to_utf8(os.str());
1220 lyxerr << "ar: '" << ar << "'\n"
1221 << "ms: '" << expr << "'" << endl;
1223 for (int i = 0; i < 100; ++i) { // at most 100 attempts
1224 // try to fix missing '*' the hard way by using mint
1226 // ... > echo "1A;" | mint -i 1 -S -s -q
1227 // on line 1: 1A;
1228 // ^ syntax error -
1229 // Probably missing an operator such as * p
1231 lyxerr << "checking expr: '" << expr << "'" << endl;
1232 string out = captureOutput("mint -i 1 -S -s -q -q", expr + ';');
1233 if (out.empty())
1234 break; // expression syntax is ok
1235 istringstream is(out);
1236 string line;
1237 getline(is, line);
1238 if (line.find("on line") != 0)
1239 break; // error message not identified
1240 getline(is, line);
1241 size_t pos = line.find('^');
1242 if (pos == string::npos || pos < 15)
1243 break; // caret position not found
1244 pos -= 15; // skip the "on line ..." part
1245 if (expr[pos] == '*' || (pos > 0 && expr[pos - 1] == '*'))
1246 break; // two '*' in a row are definitely bad
1247 expr.insert(pos, 1, '*');
1250 // FIXME UNICODE Is utf8 encoding correct?
1251 string full = "latex(" + to_utf8(extra) + '(' + expr + "));";
1252 string out = captureOutput("maple -q", header + full + trailer);
1254 // change \_ into _
1257 MathData res;
1258 mathed_parse_cell(res, from_utf8(out));
1259 return res;
1263 MathData pipeThroughOctave(docstring const &, MathData const & ar)
1265 odocstringstream os;
1266 OctaveStream vs(os);
1267 vs << ar;
1268 string expr = to_utf8(os.str());
1269 string out;
1270 // FIXME const cast
1271 Buffer * buf = const_cast<Buffer *>(ar.buffer());
1272 lyxerr << "pipe: ar: '" << ar << "'\n"
1273 << "pipe: expr: '" << expr << "'" << endl;
1275 for (int i = 0; i < 100; ++i) { // at most 100 attempts
1277 // try to fix missing '*' the hard way
1278 // parse error:
1279 // >>> ([[1 2 3 ];[2 3 1 ];[3 1 2 ]])([[1 2 3 ];[2 3 1 ];[3 1 2 ]])
1280 // ^
1282 lyxerr << "checking expr: '" << expr << "'" << endl;
1283 out = captureOutput("octave -q 2>&1", expr);
1284 lyxerr << "output: '" << out << "'" << endl;
1286 // leave loop if expression syntax is probably ok
1287 if (out.find("parse error:") == string::npos)
1288 break;
1290 // search line with single caret
1291 istringstream is(out);
1292 string line;
1293 while (is) {
1294 getline(is, line);
1295 lyxerr << "skipping line: '" << line << "'" << endl;
1296 if (line.find(">>> ") != string::npos)
1297 break;
1300 // found line with error, next line is the one with caret
1301 getline(is, line);
1302 size_t pos = line.find('^');
1303 lyxerr << "caret line: '" << line << "'" << endl;
1304 lyxerr << "found caret at pos: '" << pos << "'" << endl;
1305 if (pos == string::npos || pos < 4)
1306 break; // caret position not found
1307 pos -= 4; // skip the ">>> " part
1308 if (expr[pos] == '*')
1309 break; // two '*' in a row are definitely bad
1310 expr.insert(pos, 1, '*');
1313 // remove 'ans = ' taking into account that there may be an
1314 // ansi control sequence before, such as '\033[?1034hans = '
1315 size_t i = out.find("ans = ");
1316 if (i == string::npos)
1317 return MathData();
1318 out = out.substr(i + 6);
1320 // parse output as matrix or single number
1321 MathAtom at(new InsetMathArray(buf, from_ascii("array"), from_utf8(out)));
1322 InsetMathArray const * mat = at->asArrayInset();
1323 MathData res(buf);
1324 if (mat->ncols() == 1 && mat->nrows() == 1)
1325 res.append(mat->cell(0));
1326 else {
1327 res.push_back(MathAtom(
1328 new InsetMathDelim(buf, from_ascii("("), from_ascii(")"))));
1329 res.back().nucleus()->cell(0).push_back(at);
1331 return res;
1335 string fromMathematicaName(string const & name)
1337 if (name == "Sin") return "sin";
1338 if (name == "Sinh") return "sinh";
1339 if (name == "ArcSin") return "arcsin";
1340 if (name == "Cos") return "cos";
1341 if (name == "Cosh") return "cosh";
1342 if (name == "ArcCos") return "arccos";
1343 if (name == "Tan") return "tan";
1344 if (name == "Tanh") return "tanh";
1345 if (name == "ArcTan") return "arctan";
1346 if (name == "Cot") return "cot";
1347 if (name == "Coth") return "coth";
1348 if (name == "Csc") return "csc";
1349 if (name == "Sec") return "sec";
1350 if (name == "Exp") return "exp";
1351 if (name == "Log") return "log";
1352 if (name == "Arg" ) return "arg";
1353 if (name == "Det" ) return "det";
1354 if (name == "GCD" ) return "gcd";
1355 if (name == "Max" ) return "max";
1356 if (name == "Min" ) return "min";
1357 if (name == "Erf" ) return "erf";
1358 if (name == "Erfc" ) return "erfc";
1359 return name;
1363 void prettifyMathematicaOutput(string & out, string const & macroName,
1364 bool roman, bool translate)
1366 string const macro = "\\" + macroName + "{";
1367 size_t const len = macro.length();
1368 size_t i = out.find(macro);
1370 while (i != npos) {
1371 size_t const j = get_matching_brace(out, i + len);
1372 string const name = out.substr(i + len, j - i - len);
1373 out = out.substr(0, i)
1374 + (roman ? "\\mathrm{" : "")
1375 + (translate ? fromMathematicaName(name) : name)
1376 + out.substr(roman ? j : j + 1);
1377 //lyxerr << "output: " << out << endl;
1378 i = out.find(macro, i);
1383 MathData pipeThroughMathematica(docstring const &, MathData const & ar)
1385 odocstringstream os;
1386 MathematicaStream ms(os);
1387 ms << ar;
1388 // FIXME UNICODE Is utf8 encoding correct?
1389 string const expr = to_utf8(os.str());
1390 string out;
1392 lyxerr << "expr: '" << expr << "'" << endl;
1394 string const full = "TeXForm[" + expr + "]";
1395 out = captureOutput("math", full);
1396 lyxerr << "output: '" << out << "'" << endl;
1398 size_t pos1 = out.find("Out[1]//TeXForm= ");
1399 size_t pos2 = out.find("In[2]:=");
1401 if (pos1 == string::npos || pos2 == string::npos)
1402 return MathData();
1404 // get everything from pos1+17 to pos2
1405 out = out.substr(pos1 + 17, pos2 - pos1 - 17);
1406 out = subst(subst(out, '\r', ' '), '\n', ' ');
1408 // tries to make the result prettier
1409 prettifyMathematicaOutput(out, "Mfunction", true, true);
1410 prettifyMathematicaOutput(out, "Muserfunction", true, false);
1411 prettifyMathematicaOutput(out, "Mvariable", false, false);
1413 MathData res;
1414 mathed_parse_cell(res, from_utf8(out));
1415 return res;
1421 MathData pipeThroughExtern(string const & lang, docstring const & extra,
1422 MathData const & ar)
1424 if (lang == "octave")
1425 return pipeThroughOctave(extra, ar);
1427 if (lang == "maxima")
1428 return pipeThroughMaxima(extra, ar);
1430 if (lang == "maple")
1431 return pipeThroughMaple(extra, ar);
1433 if (lang == "mathematica")
1434 return pipeThroughMathematica(extra, ar);
1436 // create normalized expression
1437 odocstringstream os;
1438 NormalStream ns(os);
1439 os << '[' << extra << ' ';
1440 ns << ar;
1441 os << ']';
1442 // FIXME UNICODE Is utf8 encoding correct?
1443 string data = to_utf8(os.str());
1445 // search external script
1446 FileName const file = libFileSearch("mathed", "extern_" + lang);
1447 if (file.empty()) {
1448 lyxerr << "converter to '" << lang << "' not found" << endl;
1449 return MathData();
1452 // run external sript
1453 string out = captureOutput(file.absFilename(), data);
1454 MathData res;
1455 mathed_parse_cell(res, from_utf8(out));
1456 return res;
1460 } // namespace lyx