Refactor the diff-parser to be a state machine to allow me to fix
authorThomas Zander <thomas.zander@trolltech.com>
Wed, 22 Jul 2009 18:18:44 +0000 (22 21:18 +0300)
committerThomas Zander <thomas.zander@trolltech.com>
Wed, 22 Jul 2009 18:18:44 +0000 (22 21:18 +0300)
fringe cases
Unit test passes again..

src/hunks/ChangeSet.cpp

index de16127..de5c824 100644 (file)
@@ -351,132 +351,174 @@ bool ChangeSet::hasAllHunks() const
 // static
 QList<File> ChangeSet::readGitDiff(QIODevice &git, File *fileToDiff)
 {
-    QList<File> filesInDiff;
-    QList<File> newFiles;
-    QList<File> removedFiles;
-
-    // parse the output and create objects for each hunk. (note how that can be made multi-threading)
-    //  we have to have the filename in the hunk too to allow skipping a whole hunk
-    char buf[10240];
-    enum State {
-        InPatch,
-        Empty,
-        InHeader
-    };
-    // a diff can be multiple files, or just the one fileToDiff.  Lets keep one File object to point to the current file.
-    State state = Empty;
-    File file;
-    Hunk hunk;
-    while(true) {
-        qint64 lineLength = Vng::readLine(&git, buf, sizeof(buf));
-        if (lineLength == -1 || state == Empty) {
-            file.addHunk(hunk);
-            hunk = Hunk();
-            if (file.isValid()) {
-                if (file.oldFileName().isEmpty())
-                    newFiles << file;
-                else if (file.fileName().isEmpty())
-                    removedFiles << file;
+    class States {
+      public:
+        States(QIODevice &device, File *fileToDiff)
+            : m_input(device),
+            m_fileToDiff(fileToDiff),
+            m_state(Empty)
+        {
+        }
+
+        void start() {
+            Q_ASSERT(m_filesInDiff.isEmpty()); // only call start once, please ;)
+            m_state = Empty;
+            while(true) {
+                qint64 lineLength = Vng::readLine(&m_input, buf, sizeof(buf));
+                if (lineLength == -1 || m_state == Empty)
+                    storeFile();
+
+                if (lineLength == -1)
+                    break;
+                if (addLine(QString::fromLocal8Bit(buf, lineLength)))
+                    break;
+                if (m_state == InPatch) {
+                    QByteArray array(buf, lineLength);
+                    m_hunk.addLine(array);
+                }
+            }
+            m_input.close();
+
+            // try to find out if there are renames
+            foreach (File addedFile, m_newFiles) {
+                foreach (File removedFile, m_removedFiles) {
+                    if (!addedFile.sha1().isEmpty() && removedFile.oldSha1() == addedFile.sha1()) {
+                        // TODO if this is a partial sha1 we may want to check some of the content
+                        m_removedFiles.removeAll(removedFile);
+                        addedFile.setOldSha1(removedFile.oldSha1());
+                        addedFile.setOldFileName(removedFile.oldFileName());
+                        addedFile.setOldProtection(removedFile.oldProtection());
+                        break;
+                    }
+                }
+            }
+            m_filesInDiff << m_newFiles;
+            m_filesInDiff << m_removedFiles;
+            m_newFiles.clear();
+            m_removedFiles.clear();
+        }
+
+        QList<File> results() const {
+            return m_filesInDiff;
+        }
+      private:
+        enum State {
+            InPatch,
+            Empty,
+            InHeader
+        };
+
+        void storeFile() {
+            m_file.addHunk(m_hunk);
+            m_hunk = Hunk();
+            if (m_file.isValid()) {
+                if (m_file.oldFileName().isEmpty())
+                    m_newFiles << m_file;
+                else if (m_file.fileName().isEmpty())
+                    m_removedFiles << m_file;
                 else
-                    filesInDiff << file;
+                    m_filesInDiff << m_file;
             }
-            if (fileToDiff)
-                file = File(*fileToDiff);
+            if (m_fileToDiff)
+                m_file = File(*m_fileToDiff);
             else
-                file = File();
-            state = Empty;
+                m_file = File();
+            m_state = Empty;
         }
 
-        if (lineLength == -1)
-            break;
-        QString line = QString::fromLocal8Bit(buf, lineLength);
-
-        const bool newfile = line.startsWith("--- /dev/null");
-        if (line.length() > 6 && (newfile || line.startsWith("--- a/")
-            || line.startsWith("--- \"a/"))) {
-            if (!newfile && fileToDiff == 0) {
+        // returns true if we should exit.
+        bool addLine(const QString &line) {
+            const bool newfile = line.startsWith("--- /dev/null");
+            if (line.length() > 6 && (newfile || line.startsWith("--- a/")
+                || line.startsWith("--- \"a/"))) {
+                if (m_state == InPatch)
+                    storeFile();
+                m_state = InHeader;
+                if (!newfile && m_fileToDiff == 0) {
+                    if (line[4].unicode() == '"') { // git-encoding...
+                        QByteArray array(buf + 7, strlen(buf) - 8);
+                        array.prepend('"');
+                        m_file.setOldFileName(File::escapeGitFilename(array));
+                    } else {
+                        QByteArray array(buf + 6, strlen(buf) - 7);
+                        m_file.setOldFileName(File::escapeGitFilename(array));
+                    }
+                }
+            }
+            else if (m_fileToDiff == 0 && line.length() > 6 &&
+                    (line.startsWith("+++ b/") || line.startsWith("+++ \"b"))) {
+                if (m_state == InPatch)
+                    storeFile();
+                m_state = InHeader;
                 if (line[4].unicode() == '"') { // git-encoding...
                     QByteArray array(buf + 7, strlen(buf) - 8);
                     array.prepend('"');
-                    file.setOldFileName(File::escapeGitFilename(array));
+                    m_file.setFileName(File::escapeGitFilename(array));
                 } else {
-                    QByteArray array(buf + 6, strlen(buf) - 7);
-                    file.setOldFileName(File::escapeGitFilename(array));
+                    m_file.setFileName(QByteArray(buf + 6, strlen(buf) - 7));
                 }
             }
-            state = state == InPatch ? Empty : InHeader;
-        }
-        else if (fileToDiff == 0 && line.length() > 6 &&
-                (line.startsWith("+++ b/") || line.startsWith("+++ \"b"))) {
-            state = InHeader;
-            if (line[4].unicode() == '"') { // git-encoding...
-                QByteArray array(buf + 7, strlen(buf) - 8);
-                array.prepend('"');
-                file.setFileName(File::escapeGitFilename(array));
-            } else {
-                file.setFileName(QByteArray(buf + 6, strlen(buf) - 7));
+            else if (line.length() > 5 && line.startsWith("@@ -")) {
+                m_file.addHunk(m_hunk);
+                m_hunk = Hunk();
+                m_state = InPatch;
             }
-        }
-        else if (line.length() > 5 && line.startsWith("@@ -")) {
-            file.addHunk(hunk);
-            hunk = Hunk();
-            state = InPatch;
-        }
-        else if (line.startsWith("diff --git ")) {
-            state = state == InPatch ? Empty : InHeader;
-            continue;
-        }
-        else if (line.startsWith("Binary files a/") && line.indexOf(" differ") > 0) {
-            Q_ASSERT(fileToDiff);
-            fileToDiff->setBinary(true);
-            git.close();
-            return filesInDiff;
-        }
-        else if (line.startsWith("index ")) {
-            int dot = line.indexOf(QLatin1Char('.'), 6);
-            if (dot > 0 && line.length() > dot+3) {
-                file.setOldSha1(line.mid(6, dot-6));
-                int space = line.indexOf(QLatin1Char(' '), dot);
-                if (space == -1) {
-                    space = line.length()-1; // cut off the linefeed
-                } else {
-                    file.setProtection(line.mid(space+1).trimmed());
+            else if (line.startsWith("diff --git ")) {
+                if (m_state == InPatch)
+                    storeFile();
+                m_state = InHeader;
+            }
+            else if (line.startsWith("Binary files a/") && line.indexOf(" differ") > 0) {
+                Q_ASSERT(m_fileToDiff);
+                m_fileToDiff->setBinary(true);
+                return true;
+            }
+            else if (line.startsWith("index ")) {
+                if (m_state == InPatch)
+                    storeFile();
+                m_state = InHeader;
+                int dot = line.indexOf(QLatin1Char('.'), 6);
+                if (dot > 0 && line.length() > dot+3) {
+                    m_file.setOldSha1(line.mid(6, dot-6));
+                    int space = line.indexOf(QLatin1Char(' '), dot);
+                    if (space == -1) {
+                        space = line.length()-1; // cut off the linefeed
+                    } else {
+                        m_file.setProtection(line.mid(space+1).trimmed());
+                    }
+                    m_file.setSha1(line.mid(dot+2, space - dot - 2));
                 }
-                file.setSha1(line.mid(dot+2, space - dot - 2));
             }
-            state = state == InPatch ? Empty : InHeader;
-        }
-        else if (line.startsWith("deleted file mode ")) {
-            file.setProtection(line.mid(18).trimmed());
-            state = state == InPatch ? Empty : InHeader;
-        }
-        else if (line.startsWith("new file mode ")) {
-            file.setProtection(line.mid(13).trimmed());
-            state = state == InPatch ? Empty : InHeader;
-        }
-        if (state == InPatch) {
-            QByteArray array(buf, lineLength);
-            hunk.addLine(array);
-        }
-    }
-    git.close();
-
-    // try to find out if there are renames
-    foreach (File removedFile, removedFiles) {
-        foreach (File addedFile, newFiles) {
-            if (!addedFile.sha1().isEmpty() && removedFile.oldSha1() == addedFile.sha1()) {
-                // TODO if this is a partial sha1 we may want to check some of the content
-                newFiles.removeAll(addedFile);
-                addedFile.setOldSha1(removedFile.oldSha1());
-                addedFile.setOldFileName(removedFile.oldFileName());
-                addedFile.setOldProtection(removedFile.oldProtection());
-                filesInDiff << addedFile;
-                break;
+            else if (line.startsWith("deleted file mode ")) {
+                if (m_state == InPatch)
+                    storeFile();
+                m_state = InHeader;
+                m_file.setProtection(line.mid(18).trimmed());
             }
+            else if (line.startsWith("new file mode ")) {
+                if (m_state == InPatch)
+                    storeFile();
+                m_state = InHeader;
+                m_file.setProtection(line.mid(13).trimmed());
+            }
+            return false;
         }
-    }
 
-    return filesInDiff;
+        QList<File> m_filesInDiff;
+        QList<File> m_newFiles;
+        QList<File> m_removedFiles;
+
+        QIODevice &m_input;
+        File *m_fileToDiff;
+        State m_state;
+        File m_file;
+        Hunk m_hunk;
+        char buf[10240];
+    };
+
+    States stateMachine(git, fileToDiff);
+    stateMachine.start();
+    return stateMachine.results();
 }
 
 void ChangeSet::addFile(const File &file)