Intelligent parsing of ambiguously encoded meta data.
[egit.git] / org.spearce.jgit.test / tst / org / spearce / jgit / revwalk / RevCommitParseTest.java
blob805e29e925c35b0cb37558ec87f144ba0ab7e32e
1 /*
2 * Copyright (C) 2008, Google Inc.
4 * All rights reserved.
6 * Redistribution and use in source and binary forms, with or
7 * without modification, are permitted provided that the following
8 * conditions are met:
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
13 * - Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
18 * - Neither the name of the Git Development Community nor the
19 * names of its contributors may be used to endorse or promote
20 * products derived from this software without specific prior
21 * written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
24 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
25 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
28 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
30 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
32 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
33 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
35 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 package org.spearce.jgit.revwalk;
40 import java.io.ByteArrayOutputStream;
42 import org.spearce.jgit.lib.ObjectId;
43 import org.spearce.jgit.lib.PersonIdent;
44 import org.spearce.jgit.lib.RepositoryTestCase;
46 public class RevCommitParseTest extends RepositoryTestCase {
47 public void testParse_NoParents() throws Exception {
48 final ObjectId treeId = id("9788669ad918b6fcce64af8882fc9a81cb6aba67");
49 final String authorName = "A U. Thor";
50 final String authorEmail = "a_u_thor@example.com";
51 final int authorTime = 1218123387;
53 final String committerName = "C O. Miter";
54 final String committerEmail = "comiter@example.com";
55 final int committerTime = 1218123390;
56 final StringBuilder body = new StringBuilder();
58 body.append("tree ");
59 body.append(treeId.name());
60 body.append("\n");
62 body.append("author ");
63 body.append(authorName);
64 body.append(" <");
65 body.append(authorEmail);
66 body.append("> ");
67 body.append(authorTime);
68 body.append(" +0700\n");
70 body.append("committer ");
71 body.append(committerName);
72 body.append(" <");
73 body.append(committerEmail);
74 body.append("> ");
75 body.append(committerTime);
76 body.append(" -0500\n");
78 body.append("\n");
80 final RevWalk rw = new RevWalk(db);
81 final RevCommit c;
83 c = new RevCommit(id("9473095c4cb2f12aefe1db8a355fe3fafba42f67"));
84 assertNull(c.getTree());
85 assertNull(c.parents);
87 c.parseCanonical(rw, body.toString().getBytes("UTF-8"));
88 assertNotNull(c.getTree());
89 assertEquals(treeId, c.getTree().getId());
90 assertSame(rw.lookupTree(treeId), c.getTree());
92 assertNotNull(c.parents);
93 assertEquals(0, c.parents.length);
94 assertEquals("", c.getFullMessage());
96 final PersonIdent cAuthor = c.getAuthorIdent();
97 assertNotNull(cAuthor);
98 assertEquals(authorName, cAuthor.getName());
99 assertEquals(authorEmail, cAuthor.getEmailAddress());
101 final PersonIdent cCommitter = c.getCommitterIdent();
102 assertNotNull(cCommitter);
103 assertEquals(committerName, cCommitter.getName());
104 assertEquals(committerEmail, cCommitter.getEmailAddress());
107 private RevCommit create(final String msg) throws Exception {
108 final StringBuilder b = new StringBuilder();
109 b.append("tree 9788669ad918b6fcce64af8882fc9a81cb6aba67\n");
110 b.append("author A U. Thor <a_u_thor@example.com> 1218123387 +0700\n");
111 b.append("committer C O. Miter <c@example.com> 1218123390 -0500\n");
112 b.append("\n");
113 b.append(msg);
115 final RevCommit c;
116 c = new RevCommit(id("9473095c4cb2f12aefe1db8a355fe3fafba42f67"));
117 c.parseCanonical(new RevWalk(db), b.toString().getBytes("UTF-8"));
118 return c;
121 public void testParse_WeirdHeaderOnlyCommit() throws Exception {
122 final StringBuilder b = new StringBuilder();
123 b.append("tree 9788669ad918b6fcce64af8882fc9a81cb6aba67\n");
124 b.append("author A U. Thor <a_u_thor@example.com> 1218123387 +0700\n");
125 b.append("committer C O. Miter <c@example.com> 1218123390 -0500\n");
127 final RevCommit c;
128 c = new RevCommit(id("9473095c4cb2f12aefe1db8a355fe3fafba42f67"));
129 c.parseCanonical(new RevWalk(db), b.toString().getBytes("UTF-8"));
131 assertEquals("", c.getFullMessage());
132 assertEquals("", c.getShortMessage());
135 public void testParse_implicit_UTF8_encoded() throws Exception {
136 final ByteArrayOutputStream b = new ByteArrayOutputStream();
137 b.write("tree 9788669ad918b6fcce64af8882fc9a81cb6aba67\n".getBytes("UTF-8"));
138 b.write("author F\u00f6r fattare <a_u_thor@example.com> 1218123387 +0700\n".getBytes("UTF-8"));
139 b.write("committer C O. Miter <c@example.com> 1218123390 -0500\n".getBytes("UTF-8"));
140 b.write("\n".getBytes("UTF-8"));
141 b.write("Sm\u00f6rg\u00e5sbord\n".getBytes("UTF-8"));
142 b.write("\n".getBytes("UTF-8"));
143 b.write("\u304d\u308c\u3044\n".getBytes("UTF-8"));
144 final RevCommit c;
145 c = new RevCommit(id("9473095c4cb2f12aefe1db8a355fe3fafba42f67")); // bogus id
146 c.parseCanonical(new RevWalk(db), b.toByteArray());
148 assertEquals("F\u00f6r fattare", c.getAuthorIdent().getName());
149 assertEquals("Sm\u00f6rg\u00e5sbord", c.getShortMessage());
150 assertEquals("Sm\u00f6rg\u00e5sbord\n\n\u304d\u308c\u3044\n", c.getFullMessage());
153 public void testParse_implicit_mixed_encoded() throws Exception {
154 final ByteArrayOutputStream b = new ByteArrayOutputStream();
155 b.write("tree 9788669ad918b6fcce64af8882fc9a81cb6aba67\n".getBytes("UTF-8"));
156 b.write("author F\u00f6r fattare <a_u_thor@example.com> 1218123387 +0700\n".getBytes("ISO-8859-1"));
157 b.write("committer C O. Miter <c@example.com> 1218123390 -0500\n".getBytes("UTF-8"));
158 b.write("\n".getBytes("UTF-8"));
159 b.write("Sm\u00f6rg\u00e5sbord\n".getBytes("UTF-8"));
160 b.write("\n".getBytes("UTF-8"));
161 b.write("\u304d\u308c\u3044\n".getBytes("UTF-8"));
162 final RevCommit c;
163 c = new RevCommit(id("9473095c4cb2f12aefe1db8a355fe3fafba42f67")); // bogus id
164 c.parseCanonical(new RevWalk(db), b.toByteArray());
166 assertEquals("F\u00f6r fattare", c.getAuthorIdent().getName());
167 assertEquals("Sm\u00f6rg\u00e5sbord", c.getShortMessage());
168 assertEquals("Sm\u00f6rg\u00e5sbord\n\n\u304d\u308c\u3044\n", c.getFullMessage());
172 * Test parsing of a commit whose encoding is given and works.
174 * @throws Exception
176 public void testParse_explicit_encoded() throws Exception {
177 final ByteArrayOutputStream b = new ByteArrayOutputStream();
178 b.write("tree 9788669ad918b6fcce64af8882fc9a81cb6aba67\n".getBytes("EUC-JP"));
179 b.write("author F\u00f6r fattare <a_u_thor@example.com> 1218123387 +0700\n".getBytes("EUC-JP"));
180 b.write("committer C O. Miter <c@example.com> 1218123390 -0500\n".getBytes("EUC-JP"));
181 b.write("encoding euc_JP\n".getBytes("EUC-JP"));
182 b.write("\n".getBytes("EUC-JP"));
183 b.write("\u304d\u308c\u3044\n".getBytes("EUC-JP"));
184 b.write("\n".getBytes("EUC-JP"));
185 b.write("Hi\n".getBytes("EUC-JP"));
186 final RevCommit c;
187 c = new RevCommit(id("9473095c4cb2f12aefe1db8a355fe3fafba42f67")); // bogus id
188 c.parseCanonical(new RevWalk(db), b.toByteArray());
190 assertEquals("F\u00f6r fattare", c.getAuthorIdent().getName());
191 assertEquals("\u304d\u308c\u3044", c.getShortMessage());
192 assertEquals("\u304d\u308c\u3044\n\nHi\n", c.getFullMessage());
196 * This is a twisted case, but show what we expect here. We can revise the
197 * expectations provided this case is updated.
199 * What happens here is that an encoding us given, but data is not encoded
200 * that way (and we can detect it), so we try other encodings.
202 * @throws Exception
204 public void testParse_explicit_bad_encoded() throws Exception {
205 final ByteArrayOutputStream b = new ByteArrayOutputStream();
206 b.write("tree 9788669ad918b6fcce64af8882fc9a81cb6aba67\n".getBytes("UTF-8"));
207 b.write("author F\u00f6r fattare <a_u_thor@example.com> 1218123387 +0700\n".getBytes("ISO-8859-1"));
208 b.write("committer C O. Miter <c@example.com> 1218123390 -0500\n".getBytes("UTF-8"));
209 b.write("encoding EUC-JP\n".getBytes("UTF-8"));
210 b.write("\n".getBytes("UTF-8"));
211 b.write("\u304d\u308c\u3044\n".getBytes("UTF-8"));
212 b.write("\n".getBytes("UTF-8"));
213 b.write("Hi\n".getBytes("UTF-8"));
214 final RevCommit c;
215 c = new RevCommit(id("9473095c4cb2f12aefe1db8a355fe3fafba42f67")); // bogus id
216 c.parseCanonical(new RevWalk(db), b.toByteArray());
218 assertEquals("F\u00f6r fattare", c.getAuthorIdent().getName());
219 assertEquals("\u304d\u308c\u3044", c.getShortMessage());
220 assertEquals("\u304d\u308c\u3044\n\nHi\n", c.getFullMessage());
224 * This is a twisted case too, but show what we expect here. We can revise the
225 * expectations provided this case is updated.
227 * What happens here is that an encoding us given, but data is not encoded
228 * that way (and we can detect it), so we try other encodings. Here data could
229 * actually be decoded in the stated encoding, but we overide using UTF-8.
231 * @throws Exception
233 public void testParse_explicit_bad_encoded2() throws Exception {
234 final ByteArrayOutputStream b = new ByteArrayOutputStream();
235 b.write("tree 9788669ad918b6fcce64af8882fc9a81cb6aba67\n".getBytes("UTF-8"));
236 b.write("author F\u00f6r fattare <a_u_thor@example.com> 1218123387 +0700\n".getBytes("UTF-8"));
237 b.write("committer C O. Miter <c@example.com> 1218123390 -0500\n".getBytes("UTF-8"));
238 b.write("encoding ISO-8859-1\n".getBytes("UTF-8"));
239 b.write("\n".getBytes("UTF-8"));
240 b.write("\u304d\u308c\u3044\n".getBytes("UTF-8"));
241 b.write("\n".getBytes("UTF-8"));
242 b.write("Hi\n".getBytes("UTF-8"));
243 final RevCommit c;
244 c = new RevCommit(id("9473095c4cb2f12aefe1db8a355fe3fafba42f67")); // bogus id
245 c.parseCanonical(new RevWalk(db), b.toByteArray());
247 assertEquals("F\u00f6r fattare", c.getAuthorIdent().getName());
248 assertEquals("\u304d\u308c\u3044", c.getShortMessage());
249 assertEquals("\u304d\u308c\u3044\n\nHi\n", c.getFullMessage());
252 public void testParse_NoMessage() throws Exception {
253 final String msg = "";
254 final RevCommit c = create(msg);
255 assertEquals(msg, c.getFullMessage());
256 assertEquals(msg, c.getShortMessage());
259 public void testParse_OnlyLFMessage() throws Exception {
260 final RevCommit c = create("\n");
261 assertEquals("\n", c.getFullMessage());
262 assertEquals("", c.getShortMessage());
265 public void testParse_ShortLineOnlyNoLF() throws Exception {
266 final String shortMsg = "This is a short message.";
267 final RevCommit c = create(shortMsg);
268 assertEquals(shortMsg, c.getFullMessage());
269 assertEquals(shortMsg, c.getShortMessage());
272 public void testParse_ShortLineOnlyEndLF() throws Exception {
273 final String shortMsg = "This is a short message.";
274 final String fullMsg = shortMsg + "\n";
275 final RevCommit c = create(fullMsg);
276 assertEquals(fullMsg, c.getFullMessage());
277 assertEquals(shortMsg, c.getShortMessage());
280 public void testParse_ShortLineOnlyEmbeddedLF() throws Exception {
281 final String fullMsg = "This is a\nshort message.";
282 final String shortMsg = fullMsg.replace('\n', ' ');
283 final RevCommit c = create(fullMsg);
284 assertEquals(fullMsg, c.getFullMessage());
285 assertEquals(shortMsg, c.getShortMessage());
288 public void testParse_ShortLineOnlyEmbeddedAndEndingLF() throws Exception {
289 final String fullMsg = "This is a\nshort message.\n";
290 final String shortMsg = "This is a short message.";
291 final RevCommit c = create(fullMsg);
292 assertEquals(fullMsg, c.getFullMessage());
293 assertEquals(shortMsg, c.getShortMessage());
296 public void testParse_GitStyleMessage() throws Exception {
297 final String shortMsg = "This fixes a bug.";
298 final String body = "We do it with magic and pixie dust and stuff.\n"
299 + "\n" + "Signed-off-by: A U. Thor <author@example.com>\n";
300 final String fullMsg = shortMsg + "\n" + "\n" + body;
301 final RevCommit c = create(fullMsg);
302 assertEquals(fullMsg, c.getFullMessage());
303 assertEquals(shortMsg, c.getShortMessage());
306 private static ObjectId id(final String str) {
307 return ObjectId.fromString(str);