API review: QRegExp::numCaptures() -> QRegExp::captureCount()
[qt-netbsd.git] / util / lexgen / generator.cpp
blobfce405f51bf87e206222b3d46a91c5bd894295a4
1 /****************************************************************************
2 **
3 ** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
4 ** All rights reserved.
5 ** Contact: Nokia Corporation (qt-info@nokia.com)
6 **
7 ** This file is part of the utils of the Qt Toolkit.
8 **
9 ** $QT_BEGIN_LICENSE:LGPL$
10 ** No Commercial Usage
11 ** This file contains pre-release code and may not be distributed.
12 ** You may use this file in accordance with the terms and conditions
13 ** contained in the Technology Preview License Agreement accompanying
14 ** this package.
16 ** GNU Lesser General Public License Usage
17 ** Alternatively, this file may be used under the terms of the GNU Lesser
18 ** General Public License version 2.1 as published by the Free Software
19 ** Foundation and appearing in the file LICENSE.LGPL included in the
20 ** packaging of this file. Please review the following information to
21 ** ensure the GNU Lesser General Public License version 2.1 requirements
22 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
24 ** In addition, as a special exception, Nokia gives you certain additional
25 ** rights. These rights are described in the Nokia Qt LGPL Exception
26 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
28 ** If you have questions regarding the use of this file, please contact
29 ** Nokia at qt-info@nokia.com.
38 ** $QT_END_LICENSE$
40 ****************************************************************************/
42 #include "generator.h"
44 #include <QFile>
46 void Function::printDeclaration(CodeBlock &block, const QString &funcNamePrefix) const
48 block << (iline ? "inline " : "") << signature(funcNamePrefix) << (iline ? QLatin1String(" {") : QLatin1String(";"));
49 if (!iline)
50 return;
52 block.indent();
53 QString tmp = body;
54 if (tmp.endsWith(QLatin1Char('\n')))
55 tmp.chop(1);
56 foreach (QString line, tmp.split(QLatin1Char('\n')))
57 block << line;
58 block.outdent();
59 block << "}";
62 QString Function::signature(const QString &funcNamePrefix) const
64 QString sig;
65 if (!rtype.isEmpty()) {
66 sig += rtype;
67 sig += QLatin1Char(' ');
69 sig += funcNamePrefix;
70 sig += fname;
71 if (cnst)
72 sig += " const";
73 return sig;
76 QString Function::definition() const
78 if (iline)
79 return QString();
81 QString result;
82 result += signature();
83 result += QLatin1String("\n{\n");
85 QString tmp = body;
87 if (tmp.endsWith(QLatin1Char('\n')))
88 tmp.chop(1);
89 if (!tmp.startsWith(QLatin1Char('\n')))
90 tmp.prepend(" ");
92 tmp.replace(QLatin1Char('\n'), QLatin1String("\n "));
94 result += tmp;
96 result += QLatin1String("\n}\n");
98 return result;
101 void Class::Section::printDeclaration(const Class *klass, CodeBlock &block) const
103 foreach (Function ctor, constructors)
104 ctor.printDeclaration(block, klass->name());
106 if (!constructors.isEmpty())
107 block.addNewLine();
109 foreach (Function func, functions)
110 func.printDeclaration(block);
112 if (!functions.isEmpty())
113 block.addNewLine();
115 foreach (QString var, variables)
116 block << var << ';';
119 void Class::addConstructor(Access access, const QString &body, const QString &_args)
121 Function ctor;
122 QString args = _args;
123 if (!args.startsWith(QLatin1Char('('))
124 && !args.endsWith(QLatin1Char(')'))) {
125 args.prepend('(');
126 args.append(')');
128 ctor.setName(args);
129 ctor.addBody(body);
130 sections[access].constructors.append(ctor);
133 QString Class::Section::definition(const Class *klass) const
135 QString result;
137 foreach (Function ctor, constructors) {
138 ctor.setName(klass->name() + "::" + klass->name() + ctor.name());
139 result += ctor.definition();
140 result += QLatin1Char('\n');
143 foreach (Function func, functions) {
144 if (!func.hasBody()) continue;
145 func.setName(klass->name() + "::" + func.name());
146 result += func.definition();
147 result += QLatin1Char('\n');
150 return result;
153 QString Class::declaration() const
155 CodeBlock block;
157 block << QLatin1String("class ") << cname;
158 block << "{";
160 if (!sections[PublicMember].isEmpty()) {
161 block << "public:";
162 block.indent();
163 sections[PublicMember].printDeclaration(this, block);
164 block.outdent();
167 if (!sections[ProtectedMember].isEmpty()) {
168 block << "protected:";
169 block.indent();
170 sections[ProtectedMember].printDeclaration(this, block);
171 block.outdent();
174 if (!sections[PrivateMember].isEmpty()) {
175 block << "private:";
176 block.indent();
177 sections[PrivateMember].printDeclaration(this, block);
178 block.outdent();
181 block << "};";
182 block.addNewLine();
184 return block.toString();
187 QString Class::definition() const
189 return sections[PrivateMember].definition(this)
190 + sections[ProtectedMember].definition(this)
191 + sections[PublicMember].definition(this);
194 Generator::Generator(const DFA &_dfa, const Config &config)
195 : dfa(_dfa), cfg(config)
197 QList<InputType> lst = cfg.maxInputSet.toList();
198 qSort(lst);
199 minInput = lst.first();
200 maxInput = lst.last();
202 ConfigFile::Section section = config.configSections.value("Code Generator Options");
204 foreach (ConfigFile::Entry entry, section) {
205 if (!entry.key.startsWith(QLatin1String("MapToCode["))
206 || !entry.key.endsWith(QLatin1Char(']')))
207 continue;
208 QString range = entry.key;
209 range.remove(0, qstrlen("MapToCode["));
210 range.chop(1);
211 if (range.length() != 3
212 || range.at(1) != QLatin1Char('-')) {
213 qWarning("Invalid range for char mapping function: %s", qPrintable(range));
214 continue;
216 TransitionSequence seq;
217 seq.first = range.at(0).unicode();
218 seq.last = range.at(2).unicode();
219 seq.testFunction = entry.value;
220 charFunctionRanges.append(seq);
223 QString tokenPrefix = section.value("TokenPrefix");
224 if (!tokenPrefix.isEmpty()) {
225 for (int i = 0; i < dfa.count(); ++i)
226 if (!dfa.at(i).symbol.isEmpty()
227 && !dfa.at(i).symbol.endsWith(QLatin1String("()")))
228 dfa[i].symbol.prepend(tokenPrefix);
231 headerFileName = section.value("FileHeader");
234 static inline bool adjacentKeys(int left, int right) { return left + 1 == right; }
235 //static inline bool adjacentKeys(const InputType &left, const InputType &right)
236 //{ return left.val + 1 == right.val; }
238 static QVector<Generator::TransitionSequence> convertToSequences(const TransitionMap &transitions)
240 QVector<Generator::TransitionSequence> sequences;
241 if (transitions.isEmpty())
242 return sequences;
244 QList<InputType> keys = transitions.keys();
245 qSort(keys);
246 int i = 0;
247 Generator::TransitionSequence sequence;
248 sequence.first = keys.at(0);
249 ++i;
250 for (; i < keys.count(); ++i) {
251 if (adjacentKeys(keys.at(i - 1), keys.at(i))
252 && transitions.value(keys.at(i)) == transitions.value(keys.at(i - 1))) {
253 continue;
255 sequence.last = keys.at(i - 1);
256 sequence.transition = transitions.value(sequence.last);
257 sequences.append(sequence);
259 sequence.first = keys.at(i);
261 sequence.last = keys.at(i - 1);
262 sequence.transition = transitions.value(sequence.last);
263 sequences.append(sequence);
265 return sequences;
268 QDebug &operator<<(QDebug &debug, const Generator::TransitionSequence &seq)
270 return debug << "[first:" << seq.first << "; last:" << seq.last << "; transition:" << seq.transition
271 << (seq.testFunction.isEmpty() ? QString() : QString(QString("; testfunction:" + seq.testFunction)))
272 << "]";
275 bool Generator::isSingleReferencedFinalState(int i) const
277 return backReferenceMap.value(i) == 1
278 && dfa.at(i).transitions.isEmpty()
279 && !dfa.at(i).symbol.isEmpty();
282 void Generator::generateTransitions(CodeBlock &body, const TransitionMap &transitions)
284 if (transitions.isEmpty())
285 return;
287 QVector<TransitionSequence> sequences = convertToSequences(transitions);
289 bool needsCharFunction = false;
290 if (!charFunctionRanges.isEmpty()) {
291 int i = 0;
292 while (i < sequences.count()) {
293 const TransitionSequence &seq = sequences.at(i);
294 if (!seq.testFunction.isEmpty()) {
295 ++i;
296 continue;
299 foreach (TransitionSequence range, charFunctionRanges)
300 if (range.first >= seq.first && range.last <= seq.last) {
301 needsCharFunction = true;
303 TransitionSequence left, middle, right;
305 left.first = seq.first;
306 left.last = range.first - 1;
307 left.transition = seq.transition;
309 middle = range;
310 middle.transition = seq.transition;
312 right.first = range.last + 1;
313 right.last = seq.last;
314 right.transition = seq.transition;
316 sequences.remove(i);
317 if (left.last >= left.first) {
318 sequences.insert(i, left);
319 ++i;
321 sequences.insert(i, middle);
322 ++i;
323 if (right.last >= right.first) {
324 sequences.insert(i, right);
325 ++i;
328 i = -1;
329 break;
332 ++i;
336 //qDebug() << "sequence count" << sequences.count();
337 //qDebug() << sequences;
339 if (sequences.count() < 10
340 || sequences.last().last == maxInput
341 || needsCharFunction) {
342 foreach (TransitionSequence seq, sequences) {
343 const bool embedFinalState = isSingleReferencedFinalState(seq.transition);
345 QString brace;
346 if (embedFinalState)
347 brace = " {";
349 if (!seq.testFunction.isEmpty()) {
350 body << "if (" << seq.testFunction << ")" << brace;
351 } else if (seq.first == seq.last) {
352 body << "if (ch.unicode() == " << seq.first << ")" << brace;
353 } else {
354 if (seq.last < maxInput)
355 body << "if (ch.unicode() >= " << seq.first
356 << " && ch.unicode() <= " << seq.last << ")" << brace;
357 else
358 body << "if (ch.unicode() >= " << seq.first << ")" << brace;
360 body.indent();
361 if (embedFinalState) {
362 body << "token = " << dfa.at(seq.transition).symbol << ";";
363 body << "goto found;";
365 body.outdent();
366 body << "}";
367 } else {
368 body << "goto state_" << seq.transition << ";";
369 body.outdent();
372 } else {
373 QList<InputType> keys = transitions.keys();
374 qSort(keys);
376 body << "switch (ch.unicode()) {";
377 body.indent();
378 for (int k = 0; k < keys.count(); ++k) {
379 const InputType key = keys.at(k);
380 const int trans = transitions.value(key);
382 QString keyStr;
383 if (key == '\\')
384 keyStr = QString("\'\\\\\'");
385 else if (key >= 48 && key < 127)
386 keyStr = QString('\'') + QChar::fromLatin1(char(key)) + QChar('\'');
387 else
388 keyStr = QString::number(key);
390 if (k < keys.count() - 1
391 && transitions.value(keys.at(k + 1)) == trans) {
392 body << "case " << keyStr << ":";
393 } else {
394 if (isSingleReferencedFinalState(trans)) {
395 body << "case " << keyStr << ": token = " << dfa.at(trans).symbol << "; goto found;";
396 } else {
397 body << "case " << keyStr << ": goto state_" << trans << ";";
401 body.outdent();
402 body << "}";
406 QString Generator::generate()
408 Class klass(cfg.className);
410 klass.addMember(Class::PublicMember, "QString input");
411 klass.addMember(Class::PublicMember, "int pos");
412 klass.addMember(Class::PublicMember, "int lexemStart");
413 klass.addMember(Class::PublicMember, "int lexemLength");
416 CodeBlock body;
417 body << "input = inp;";
418 body << "pos = 0;";
419 body << "lexemStart = 0;";
420 body << "lexemLength = 0;";
421 klass.addConstructor(Class::PublicMember, body, "const QString &inp");
425 Function next("QChar", "next()");
426 next.setInline(true);
427 if (cfg.caseSensitivity == Qt::CaseSensitive)
428 next.addBody("return (pos < input.length()) ? input.at(pos++) : QChar();");
429 else
430 next.addBody("return (pos < input.length()) ? input.at(pos++).toLower() : QChar();");
431 klass.addMember(Class::PublicMember, next);
436 Function lexem("QString", "lexem()");
437 lexem.setConst(true);
438 lexem.setInline(true);
439 lexem.addBody("return input.mid(lexemStart, lexemLength);");
440 klass.addMember(Class::PublicMember, lexem);
444 for (int i = 0; i < dfa.count(); ++i)
445 if (dfa.at(i).symbol.endsWith(QLatin1String("()"))) {
446 Function handlerFunc("int", dfa.at(i).symbol);
447 klass.addMember(Class::PublicMember, handlerFunc);
450 Function lexFunc;
451 lexFunc.setReturnType("int");
452 lexFunc.setName("lex()");
454 CodeBlock body;
455 body << "lexemStart = pos;";
456 body << "lexemLength = 0;";
457 body << "int lastAcceptingPos = -1;";
458 body << "int token = -1;";
459 body << "QChar ch;";
460 body.addNewLine();
462 backReferenceMap.clear();
463 foreach (State s, dfa)
464 foreach (int state, s.transitions)
465 backReferenceMap[state]++;
467 bool haveSingleReferencedFinalState = false;
469 for (int i = 0; i < dfa.count(); ++i) {
470 if (isSingleReferencedFinalState(i)) {
471 haveSingleReferencedFinalState = true;
472 continue;
475 if (i > 0)
476 body << "state_" << i << ":";
477 else
478 body << "// initial state";
480 body.indent();
482 if (!dfa.at(i).symbol.isEmpty()) {
483 body << "lastAcceptingPos = pos;";
484 body << "token = " << dfa.at(i).symbol << ";";
487 body.outdent();
489 body.indent();
491 if (!dfa.at(i).transitions.isEmpty()) {
492 body << "ch = next();";
493 generateTransitions(body, dfa.at(i).transitions);
496 body << "goto out;";
498 body.outdent();
501 if (haveSingleReferencedFinalState) {
502 body << "found:";
503 body << "lastAcceptingPos = pos;";
504 body.addNewLine();
507 body << "out:";
508 body << "if (lastAcceptingPos != -1) {";
509 body.indent();
510 body << "lexemLength = lastAcceptingPos - lexemStart;";
511 body << "pos = lastAcceptingPos;";
512 body.outdent();
513 body << "}";
514 body << "return token;";
516 lexFunc.addBody(body);
518 klass.addMember(Class::PublicMember, lexFunc);
520 QString header;
521 QFile headerFile(headerFileName);
522 if (!headerFileName.isEmpty()
523 && headerFile.exists()
524 && headerFile.open(QIODevice::ReadOnly)) {
525 header = QString::fromUtf8(headerFile.readAll());
528 header += QLatin1String("// auto generated. DO NOT EDIT.\n");
530 return header + klass.declaration() + klass.definition();