From: Thomas Zander Date: Wed, 22 Jul 2009 18:18:44 +0000 (+0300) Subject: Refactor the diff-parser to be a state machine to allow me to fix X-Git-Url: https://repo.or.cz/w/vng.git/commitdiff_plain/183b784bb3f1b0bb45bed3027964926224af63f3 Refactor the diff-parser to be a state machine to allow me to fix fringe cases Unit test passes again.. --- diff --git a/src/hunks/ChangeSet.cpp b/src/hunks/ChangeSet.cpp index de16127..de5c824 100644 --- a/src/hunks/ChangeSet.cpp +++ b/src/hunks/ChangeSet.cpp @@ -351,132 +351,174 @@ bool ChangeSet::hasAllHunks() const // static QList ChangeSet::readGitDiff(QIODevice &git, File *fileToDiff) { - QList filesInDiff; - QList newFiles; - QList removedFiles; - - // parse the output and create objects for each hunk. (note how that can be made multi-threading) - // we have to have the filename in the hunk too to allow skipping a whole hunk - char buf[10240]; - enum State { - InPatch, - Empty, - InHeader - }; - // a diff can be multiple files, or just the one fileToDiff. Lets keep one File object to point to the current file. - State state = Empty; - File file; - Hunk hunk; - while(true) { - qint64 lineLength = Vng::readLine(&git, buf, sizeof(buf)); - if (lineLength == -1 || state == Empty) { - file.addHunk(hunk); - hunk = Hunk(); - if (file.isValid()) { - if (file.oldFileName().isEmpty()) - newFiles << file; - else if (file.fileName().isEmpty()) - removedFiles << file; + class States { + public: + States(QIODevice &device, File *fileToDiff) + : m_input(device), + m_fileToDiff(fileToDiff), + m_state(Empty) + { + } + + void start() { + Q_ASSERT(m_filesInDiff.isEmpty()); // only call start once, please ;) + m_state = Empty; + while(true) { + qint64 lineLength = Vng::readLine(&m_input, buf, sizeof(buf)); + if (lineLength == -1 || m_state == Empty) + storeFile(); + + if (lineLength == -1) + break; + if (addLine(QString::fromLocal8Bit(buf, lineLength))) + break; + if (m_state == InPatch) { + QByteArray array(buf, lineLength); + m_hunk.addLine(array); + } + } + m_input.close(); + + // try to find out if there are renames + foreach (File addedFile, m_newFiles) { + foreach (File removedFile, m_removedFiles) { + if (!addedFile.sha1().isEmpty() && removedFile.oldSha1() == addedFile.sha1()) { + // TODO if this is a partial sha1 we may want to check some of the content + m_removedFiles.removeAll(removedFile); + addedFile.setOldSha1(removedFile.oldSha1()); + addedFile.setOldFileName(removedFile.oldFileName()); + addedFile.setOldProtection(removedFile.oldProtection()); + break; + } + } + } + m_filesInDiff << m_newFiles; + m_filesInDiff << m_removedFiles; + m_newFiles.clear(); + m_removedFiles.clear(); + } + + QList results() const { + return m_filesInDiff; + } + private: + enum State { + InPatch, + Empty, + InHeader + }; + + void storeFile() { + m_file.addHunk(m_hunk); + m_hunk = Hunk(); + if (m_file.isValid()) { + if (m_file.oldFileName().isEmpty()) + m_newFiles << m_file; + else if (m_file.fileName().isEmpty()) + m_removedFiles << m_file; else - filesInDiff << file; + m_filesInDiff << m_file; } - if (fileToDiff) - file = File(*fileToDiff); + if (m_fileToDiff) + m_file = File(*m_fileToDiff); else - file = File(); - state = Empty; + m_file = File(); + m_state = Empty; } - if (lineLength == -1) - break; - QString line = QString::fromLocal8Bit(buf, lineLength); - - const bool newfile = line.startsWith("--- /dev/null"); - if (line.length() > 6 && (newfile || line.startsWith("--- a/") - || line.startsWith("--- \"a/"))) { - if (!newfile && fileToDiff == 0) { + // returns true if we should exit. + bool addLine(const QString &line) { + const bool newfile = line.startsWith("--- /dev/null"); + if (line.length() > 6 && (newfile || line.startsWith("--- a/") + || line.startsWith("--- \"a/"))) { + if (m_state == InPatch) + storeFile(); + m_state = InHeader; + if (!newfile && m_fileToDiff == 0) { + if (line[4].unicode() == '"') { // git-encoding... + QByteArray array(buf + 7, strlen(buf) - 8); + array.prepend('"'); + m_file.setOldFileName(File::escapeGitFilename(array)); + } else { + QByteArray array(buf + 6, strlen(buf) - 7); + m_file.setOldFileName(File::escapeGitFilename(array)); + } + } + } + else if (m_fileToDiff == 0 && line.length() > 6 && + (line.startsWith("+++ b/") || line.startsWith("+++ \"b"))) { + if (m_state == InPatch) + storeFile(); + m_state = InHeader; if (line[4].unicode() == '"') { // git-encoding... QByteArray array(buf + 7, strlen(buf) - 8); array.prepend('"'); - file.setOldFileName(File::escapeGitFilename(array)); + m_file.setFileName(File::escapeGitFilename(array)); } else { - QByteArray array(buf + 6, strlen(buf) - 7); - file.setOldFileName(File::escapeGitFilename(array)); + m_file.setFileName(QByteArray(buf + 6, strlen(buf) - 7)); } } - state = state == InPatch ? Empty : InHeader; - } - else if (fileToDiff == 0 && line.length() > 6 && - (line.startsWith("+++ b/") || line.startsWith("+++ \"b"))) { - state = InHeader; - if (line[4].unicode() == '"') { // git-encoding... - QByteArray array(buf + 7, strlen(buf) - 8); - array.prepend('"'); - file.setFileName(File::escapeGitFilename(array)); - } else { - file.setFileName(QByteArray(buf + 6, strlen(buf) - 7)); + else if (line.length() > 5 && line.startsWith("@@ -")) { + m_file.addHunk(m_hunk); + m_hunk = Hunk(); + m_state = InPatch; } - } - else if (line.length() > 5 && line.startsWith("@@ -")) { - file.addHunk(hunk); - hunk = Hunk(); - state = InPatch; - } - else if (line.startsWith("diff --git ")) { - state = state == InPatch ? Empty : InHeader; - continue; - } - else if (line.startsWith("Binary files a/") && line.indexOf(" differ") > 0) { - Q_ASSERT(fileToDiff); - fileToDiff->setBinary(true); - git.close(); - return filesInDiff; - } - else if (line.startsWith("index ")) { - int dot = line.indexOf(QLatin1Char('.'), 6); - if (dot > 0 && line.length() > dot+3) { - file.setOldSha1(line.mid(6, dot-6)); - int space = line.indexOf(QLatin1Char(' '), dot); - if (space == -1) { - space = line.length()-1; // cut off the linefeed - } else { - file.setProtection(line.mid(space+1).trimmed()); + else if (line.startsWith("diff --git ")) { + if (m_state == InPatch) + storeFile(); + m_state = InHeader; + } + else if (line.startsWith("Binary files a/") && line.indexOf(" differ") > 0) { + Q_ASSERT(m_fileToDiff); + m_fileToDiff->setBinary(true); + return true; + } + else if (line.startsWith("index ")) { + if (m_state == InPatch) + storeFile(); + m_state = InHeader; + int dot = line.indexOf(QLatin1Char('.'), 6); + if (dot > 0 && line.length() > dot+3) { + m_file.setOldSha1(line.mid(6, dot-6)); + int space = line.indexOf(QLatin1Char(' '), dot); + if (space == -1) { + space = line.length()-1; // cut off the linefeed + } else { + m_file.setProtection(line.mid(space+1).trimmed()); + } + m_file.setSha1(line.mid(dot+2, space - dot - 2)); } - file.setSha1(line.mid(dot+2, space - dot - 2)); } - state = state == InPatch ? Empty : InHeader; - } - else if (line.startsWith("deleted file mode ")) { - file.setProtection(line.mid(18).trimmed()); - state = state == InPatch ? Empty : InHeader; - } - else if (line.startsWith("new file mode ")) { - file.setProtection(line.mid(13).trimmed()); - state = state == InPatch ? Empty : InHeader; - } - if (state == InPatch) { - QByteArray array(buf, lineLength); - hunk.addLine(array); - } - } - git.close(); - - // try to find out if there are renames - foreach (File removedFile, removedFiles) { - foreach (File addedFile, newFiles) { - if (!addedFile.sha1().isEmpty() && removedFile.oldSha1() == addedFile.sha1()) { - // TODO if this is a partial sha1 we may want to check some of the content - newFiles.removeAll(addedFile); - addedFile.setOldSha1(removedFile.oldSha1()); - addedFile.setOldFileName(removedFile.oldFileName()); - addedFile.setOldProtection(removedFile.oldProtection()); - filesInDiff << addedFile; - break; + else if (line.startsWith("deleted file mode ")) { + if (m_state == InPatch) + storeFile(); + m_state = InHeader; + m_file.setProtection(line.mid(18).trimmed()); } + else if (line.startsWith("new file mode ")) { + if (m_state == InPatch) + storeFile(); + m_state = InHeader; + m_file.setProtection(line.mid(13).trimmed()); + } + return false; } - } - return filesInDiff; + QList m_filesInDiff; + QList m_newFiles; + QList m_removedFiles; + + QIODevice &m_input; + File *m_fileToDiff; + State m_state; + File m_file; + Hunk m_hunk; + char buf[10240]; + }; + + States stateMachine(git, fileToDiff); + stateMachine.start(); + return stateMachine.results(); } void ChangeSet::addFile(const File &file)