Set up for better tree diff optimizations
[git/mingw/j6t.git] / convert-objects.c
blob4809f9199fa21dcd95ab508e26196080d49e8e88
1 #include "cache.h"
2 #include "blob.h"
3 #include "commit.h"
4 #include "tree.h"
6 struct entry {
7 unsigned char old_sha1[20];
8 unsigned char new_sha1[20];
9 int converted;
12 #define MAXOBJECTS (1000000)
14 static struct entry *convert[MAXOBJECTS];
15 static int nr_convert;
17 static struct entry * convert_entry(unsigned char *sha1);
19 static struct entry *insert_new(unsigned char *sha1, int pos)
21 struct entry *new = xcalloc(1, sizeof(struct entry));
22 hashcpy(new->old_sha1, sha1);
23 memmove(convert + pos + 1, convert + pos, (nr_convert - pos) * sizeof(struct entry *));
24 convert[pos] = new;
25 nr_convert++;
26 if (nr_convert == MAXOBJECTS)
27 die("you're kidding me - hit maximum object limit");
28 return new;
31 static struct entry *lookup_entry(unsigned char *sha1)
33 int low = 0, high = nr_convert;
35 while (low < high) {
36 int next = (low + high) / 2;
37 struct entry *n = convert[next];
38 int cmp = hashcmp(sha1, n->old_sha1);
39 if (!cmp)
40 return n;
41 if (cmp < 0) {
42 high = next;
43 continue;
45 low = next+1;
47 return insert_new(sha1, low);
50 static void convert_binary_sha1(void *buffer)
52 struct entry *entry = convert_entry(buffer);
53 hashcpy(buffer, entry->new_sha1);
56 static void convert_ascii_sha1(void *buffer)
58 unsigned char sha1[20];
59 struct entry *entry;
61 if (get_sha1_hex(buffer, sha1))
62 die("expected sha1, got '%s'", (char*) buffer);
63 entry = convert_entry(sha1);
64 memcpy(buffer, sha1_to_hex(entry->new_sha1), 40);
67 static unsigned int convert_mode(unsigned int mode)
69 unsigned int newmode;
71 newmode = mode & S_IFMT;
72 if (S_ISREG(mode))
73 newmode |= (mode & 0100) ? 0755 : 0644;
74 return newmode;
77 static int write_subdirectory(void *buffer, unsigned long size, const char *base, int baselen, unsigned char *result_sha1)
79 char *new = xmalloc(size);
80 unsigned long newlen = 0;
81 unsigned long used;
83 used = 0;
84 while (size) {
85 int len = 21 + strlen(buffer);
86 char *path = strchr(buffer, ' ');
87 unsigned char *sha1;
88 unsigned int mode;
89 char *slash, *origpath;
91 if (!path || sscanf(buffer, "%o", &mode) != 1)
92 die("bad tree conversion");
93 mode = convert_mode(mode);
94 path++;
95 if (memcmp(path, base, baselen))
96 break;
97 origpath = path;
98 path += baselen;
99 slash = strchr(path, '/');
100 if (!slash) {
101 newlen += sprintf(new + newlen, "%o %s", mode, path);
102 new[newlen++] = '\0';
103 hashcpy((unsigned char*)new + newlen, (unsigned char *) buffer + len - 20);
104 newlen += 20;
106 used += len;
107 size -= len;
108 buffer = (char *) buffer + len;
109 continue;
112 newlen += sprintf(new + newlen, "%o %.*s", S_IFDIR, (int)(slash - path), path);
113 new[newlen++] = 0;
114 sha1 = (unsigned char *)(new + newlen);
115 newlen += 20;
117 len = write_subdirectory(buffer, size, origpath, slash-origpath+1, sha1);
119 used += len;
120 size -= len;
121 buffer = (char *) buffer + len;
124 write_sha1_file(new, newlen, tree_type, result_sha1);
125 free(new);
126 return used;
129 static void convert_tree(void *buffer, unsigned long size, unsigned char *result_sha1)
131 void *orig_buffer = buffer;
132 unsigned long orig_size = size;
134 while (size) {
135 size_t len = 1+strlen(buffer);
137 convert_binary_sha1((char *) buffer + len);
139 len += 20;
140 if (len > size)
141 die("corrupt tree object");
142 size -= len;
143 buffer = (char *) buffer + len;
146 write_subdirectory(orig_buffer, orig_size, "", 0, result_sha1);
149 static unsigned long parse_oldstyle_date(const char *buf)
151 char c, *p;
152 char buffer[100];
153 struct tm tm;
154 const char *formats[] = {
155 "%c",
156 "%a %b %d %T",
157 "%Z",
158 "%Y",
159 " %Y",
160 NULL
162 /* We only ever did two timezones in the bad old format .. */
163 const char *timezones[] = {
164 "PDT", "PST", "CEST", NULL
166 const char **fmt = formats;
168 p = buffer;
169 while (isspace(c = *buf))
170 buf++;
171 while ((c = *buf++) != '\n')
172 *p++ = c;
173 *p++ = 0;
174 buf = buffer;
175 memset(&tm, 0, sizeof(tm));
176 do {
177 const char *next = strptime(buf, *fmt, &tm);
178 if (next) {
179 if (!*next)
180 return mktime(&tm);
181 buf = next;
182 } else {
183 const char **p = timezones;
184 while (isspace(*buf))
185 buf++;
186 while (*p) {
187 if (!memcmp(buf, *p, strlen(*p))) {
188 buf += strlen(*p);
189 break;
191 p++;
194 fmt++;
195 } while (*buf && *fmt);
196 printf("left: %s\n", buf);
197 return mktime(&tm);
200 static int convert_date_line(char *dst, void **buf, unsigned long *sp)
202 unsigned long size = *sp;
203 char *line = *buf;
204 char *next = strchr(line, '\n');
205 char *date = strchr(line, '>');
206 int len;
208 if (!next || !date)
209 die("missing or bad author/committer line %s", line);
210 next++; date += 2;
212 *buf = next;
213 *sp = size - (next - line);
215 len = date - line;
216 memcpy(dst, line, len);
217 dst += len;
219 /* Is it already in new format? */
220 if (isdigit(*date)) {
221 int datelen = next - date;
222 memcpy(dst, date, datelen);
223 return len + datelen;
227 * Hacky hacky: one of the sparse old-style commits does not have
228 * any date at all, but we can fake it by using the committer date.
230 if (*date == '\n' && strchr(next, '>'))
231 date = strchr(next, '>')+2;
233 return len + sprintf(dst, "%lu -0700\n", parse_oldstyle_date(date));
236 static void convert_date(void *buffer, unsigned long size, unsigned char *result_sha1)
238 char *new = xmalloc(size + 100);
239 unsigned long newlen = 0;
241 /* "tree <sha1>\n" */
242 memcpy(new + newlen, buffer, 46);
243 newlen += 46;
244 buffer = (char *) buffer + 46;
245 size -= 46;
247 /* "parent <sha1>\n" */
248 while (!memcmp(buffer, "parent ", 7)) {
249 memcpy(new + newlen, buffer, 48);
250 newlen += 48;
251 buffer = (char *) buffer + 48;
252 size -= 48;
255 /* "author xyz <xyz> date" */
256 newlen += convert_date_line(new + newlen, &buffer, &size);
257 /* "committer xyz <xyz> date" */
258 newlen += convert_date_line(new + newlen, &buffer, &size);
260 /* Rest */
261 memcpy(new + newlen, buffer, size);
262 newlen += size;
264 write_sha1_file(new, newlen, commit_type, result_sha1);
265 free(new);
268 static void convert_commit(void *buffer, unsigned long size, unsigned char *result_sha1)
270 void *orig_buffer = buffer;
271 unsigned long orig_size = size;
273 if (memcmp(buffer, "tree ", 5))
274 die("Bad commit '%s'", (char*) buffer);
275 convert_ascii_sha1((char *) buffer + 5);
276 buffer = (char *) buffer + 46; /* "tree " + "hex sha1" + "\n" */
277 while (!memcmp(buffer, "parent ", 7)) {
278 convert_ascii_sha1((char *) buffer + 7);
279 buffer = (char *) buffer + 48;
281 convert_date(orig_buffer, orig_size, result_sha1);
284 static struct entry * convert_entry(unsigned char *sha1)
286 struct entry *entry = lookup_entry(sha1);
287 enum object_type type;
288 void *buffer, *data;
289 unsigned long size;
291 if (entry->converted)
292 return entry;
293 data = read_sha1_file(sha1, &type, &size);
294 if (!data)
295 die("unable to read object %s", sha1_to_hex(sha1));
297 buffer = xmalloc(size);
298 memcpy(buffer, data, size);
300 if (type == OBJ_BLOB) {
301 write_sha1_file(buffer, size, blob_type, entry->new_sha1);
302 } else if (type == OBJ_TREE)
303 convert_tree(buffer, size, entry->new_sha1);
304 else if (type == OBJ_COMMIT)
305 convert_commit(buffer, size, entry->new_sha1);
306 else
307 die("unknown object type %d in %s", type, sha1_to_hex(sha1));
308 entry->converted = 1;
309 free(buffer);
310 free(data);
311 return entry;
314 int main(int argc, char **argv)
316 unsigned char sha1[20];
317 struct entry *entry;
319 setup_git_directory();
321 if (argc != 2)
322 usage("git-convert-objects <sha1>");
323 if (get_sha1(argv[1], sha1))
324 die("Not a valid object name %s", argv[1]);
326 entry = convert_entry(sha1);
327 printf("new sha1: %s\n", sha1_to_hex(entry->new_sha1));
328 return 0;