From 9a7d0dc3677067787019ab4e5ea7cd79ce013679 Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Tue, 23 Jun 2009 20:11:48 -0700 Subject: [PATCH] Add getEncoding() to RevCommit to discover the encoding If an application needs to parse the raw buffer by hand it might benefit from knowing the encoding of the commit. We can make it available to them through a getEncoding() method, using the same logic we already use for getFullMessage() and getShortMessage(), but this is still only an estimate based on the "encoding" header and may not be reality if the commit is horribly malformed. Signed-off-by: Shawn O. Pearce Signed-off-by: Robin Rosenberg --- .../tst/org/spearce/jgit/revwalk/RevCommitParseTest.java | 6 ++++++ .../src/org/spearce/jgit/revwalk/RevCommit.java | 16 ++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/org.spearce.jgit.test/tst/org/spearce/jgit/revwalk/RevCommitParseTest.java b/org.spearce.jgit.test/tst/org/spearce/jgit/revwalk/RevCommitParseTest.java index 9b959243..62a4ab54 100644 --- a/org.spearce.jgit.test/tst/org/spearce/jgit/revwalk/RevCommitParseTest.java +++ b/org.spearce.jgit.test/tst/org/spearce/jgit/revwalk/RevCommitParseTest.java @@ -39,6 +39,7 @@ package org.spearce.jgit.revwalk; import java.io.ByteArrayOutputStream; +import org.spearce.jgit.lib.Constants; import org.spearce.jgit.lib.ObjectId; import org.spearce.jgit.lib.PersonIdent; import org.spearce.jgit.lib.RepositoryTestCase; @@ -145,6 +146,7 @@ public class RevCommitParseTest extends RepositoryTestCase { c = new RevCommit(id("9473095c4cb2f12aefe1db8a355fe3fafba42f67")); // bogus id c.parseCanonical(new RevWalk(db), b.toByteArray()); + assertSame(Constants.CHARSET, c.getEncoding()); assertEquals("F\u00f6r fattare", c.getAuthorIdent().getName()); assertEquals("Sm\u00f6rg\u00e5sbord", c.getShortMessage()); assertEquals("Sm\u00f6rg\u00e5sbord\n\n\u304d\u308c\u3044\n", c.getFullMessage()); @@ -163,6 +165,7 @@ public class RevCommitParseTest extends RepositoryTestCase { c = new RevCommit(id("9473095c4cb2f12aefe1db8a355fe3fafba42f67")); // bogus id c.parseCanonical(new RevWalk(db), b.toByteArray()); + assertSame(Constants.CHARSET, c.getEncoding()); assertEquals("F\u00f6r fattare", c.getAuthorIdent().getName()); assertEquals("Sm\u00f6rg\u00e5sbord", c.getShortMessage()); assertEquals("Sm\u00f6rg\u00e5sbord\n\n\u304d\u308c\u3044\n", c.getFullMessage()); @@ -187,6 +190,7 @@ public class RevCommitParseTest extends RepositoryTestCase { c = new RevCommit(id("9473095c4cb2f12aefe1db8a355fe3fafba42f67")); // bogus id c.parseCanonical(new RevWalk(db), b.toByteArray()); + assertEquals("EUC-JP", c.getEncoding().name()); assertEquals("F\u00f6r fattare", c.getAuthorIdent().getName()); assertEquals("\u304d\u308c\u3044", c.getShortMessage()); assertEquals("\u304d\u308c\u3044\n\nHi\n", c.getFullMessage()); @@ -215,6 +219,7 @@ public class RevCommitParseTest extends RepositoryTestCase { c = new RevCommit(id("9473095c4cb2f12aefe1db8a355fe3fafba42f67")); // bogus id c.parseCanonical(new RevWalk(db), b.toByteArray()); + assertEquals("EUC-JP", c.getEncoding().name()); assertEquals("F\u00f6r fattare", c.getAuthorIdent().getName()); assertEquals("\u304d\u308c\u3044", c.getShortMessage()); assertEquals("\u304d\u308c\u3044\n\nHi\n", c.getFullMessage()); @@ -244,6 +249,7 @@ public class RevCommitParseTest extends RepositoryTestCase { c = new RevCommit(id("9473095c4cb2f12aefe1db8a355fe3fafba42f67")); // bogus id c.parseCanonical(new RevWalk(db), b.toByteArray()); + assertEquals("ISO-8859-1", c.getEncoding().name()); assertEquals("F\u00f6r fattare", c.getAuthorIdent().getName()); assertEquals("\u304d\u308c\u3044", c.getShortMessage()); assertEquals("\u304d\u308c\u3044\n\nHi\n", c.getFullMessage()); diff --git a/org.spearce.jgit/src/org/spearce/jgit/revwalk/RevCommit.java b/org.spearce.jgit/src/org/spearce/jgit/revwalk/RevCommit.java index f211dfd6..284a1838 100644 --- a/org.spearce.jgit/src/org/spearce/jgit/revwalk/RevCommit.java +++ b/org.spearce.jgit/src/org/spearce/jgit/revwalk/RevCommit.java @@ -1,4 +1,5 @@ /* + * Copyright (C) 2009, Google Inc. * Copyright (C) 2008, Shawn O. Pearce * * All rights reserved. @@ -377,6 +378,21 @@ public class RevCommit extends RevObject { } /** + * Determine the encoding of the commit message buffer. + *

+ * Locates the "encoding" header (if present) and then returns the proper + * character set to apply to this buffer to evaluate its contents as + * character data. + *

+ * If no encoding header is present, {@link Constants#CHARSET} is assumed. + * + * @return the preferred encoding of {@link #getRawBuffer()}. + */ + public final Charset getEncoding() { + return RawParseUtils.parseEncoding(buffer); + } + + /** * Reset this commit to allow another RevWalk with the same instances. *

* Subclasses must call super.reset() to ensure the -- 2.11.4.GIT