Avoid wasting memory while keeping track of what we have during fetch.
[git/jnareb-git.git] / convert-objects.c
blob073cab592fba81c97e6f13d660b0329a42427ef0
1 #define _XOPEN_SOURCE /* glibc2 needs this */
2 #define __EXTENSIONS__ /* solaris needs this */
3 #include <time.h>
4 #include <ctype.h>
5 #include "cache.h"
7 struct entry {
8 unsigned char old_sha1[20];
9 unsigned char new_sha1[20];
10 int converted;
13 #define MAXOBJECTS (1000000)
15 static struct entry *convert[MAXOBJECTS];
16 static int nr_convert;
18 static struct entry * convert_entry(unsigned char *sha1);
20 static struct entry *insert_new(unsigned char *sha1, int pos)
22 struct entry *new = xmalloc(sizeof(struct entry));
23 memset(new, 0, sizeof(*new));
24 memcpy(new->old_sha1, sha1, 20);
25 memmove(convert + pos + 1, convert + pos, (nr_convert - pos) * sizeof(struct entry *));
26 convert[pos] = new;
27 nr_convert++;
28 if (nr_convert == MAXOBJECTS)
29 die("you're kidding me - hit maximum object limit");
30 return new;
33 static struct entry *lookup_entry(unsigned char *sha1)
35 int low = 0, high = nr_convert;
37 while (low < high) {
38 int next = (low + high) / 2;
39 struct entry *n = convert[next];
40 int cmp = memcmp(sha1, n->old_sha1, 20);
41 if (!cmp)
42 return n;
43 if (cmp < 0) {
44 high = next;
45 continue;
47 low = next+1;
49 return insert_new(sha1, low);
52 static void convert_binary_sha1(void *buffer)
54 struct entry *entry = convert_entry(buffer);
55 memcpy(buffer, entry->new_sha1, 20);
58 static void convert_ascii_sha1(void *buffer)
60 unsigned char sha1[20];
61 struct entry *entry;
63 if (get_sha1_hex(buffer, sha1))
64 die("expected sha1, got '%s'", (char*) buffer);
65 entry = convert_entry(sha1);
66 memcpy(buffer, sha1_to_hex(entry->new_sha1), 40);
69 static unsigned int convert_mode(unsigned int mode)
71 unsigned int newmode;
73 newmode = mode & S_IFMT;
74 if (S_ISREG(mode))
75 newmode |= (mode & 0100) ? 0755 : 0644;
76 return newmode;
79 static int write_subdirectory(void *buffer, unsigned long size, const char *base, int baselen, unsigned char *result_sha1)
81 char *new = xmalloc(size);
82 unsigned long newlen = 0;
83 unsigned long used;
85 used = 0;
86 while (size) {
87 int len = 21 + strlen(buffer);
88 char *path = strchr(buffer, ' ');
89 unsigned char *sha1;
90 unsigned int mode;
91 char *slash, *origpath;
93 if (!path || sscanf(buffer, "%o", &mode) != 1)
94 die("bad tree conversion");
95 mode = convert_mode(mode);
96 path++;
97 if (memcmp(path, base, baselen))
98 break;
99 origpath = path;
100 path += baselen;
101 slash = strchr(path, '/');
102 if (!slash) {
103 newlen += sprintf(new + newlen, "%o %s", mode, path);
104 new[newlen++] = '\0';
105 memcpy(new + newlen, buffer + len - 20, 20);
106 newlen += 20;
108 used += len;
109 size -= len;
110 buffer += len;
111 continue;
114 newlen += sprintf(new + newlen, "%o %.*s", S_IFDIR, (int)(slash - path), path);
115 new[newlen++] = 0;
116 sha1 = (unsigned char *)(new + newlen);
117 newlen += 20;
119 len = write_subdirectory(buffer, size, origpath, slash-origpath+1, sha1);
121 used += len;
122 size -= len;
123 buffer += len;
126 write_sha1_file(new, newlen, "tree", result_sha1);
127 free(new);
128 return used;
131 static void convert_tree(void *buffer, unsigned long size, unsigned char *result_sha1)
133 void *orig_buffer = buffer;
134 unsigned long orig_size = size;
136 while (size) {
137 int len = 1+strlen(buffer);
139 convert_binary_sha1(buffer + len);
141 len += 20;
142 if (len > size)
143 die("corrupt tree object");
144 size -= len;
145 buffer += len;
148 write_subdirectory(orig_buffer, orig_size, "", 0, result_sha1);
151 static unsigned long parse_oldstyle_date(const char *buf)
153 char c, *p;
154 char buffer[100];
155 struct tm tm;
156 const char *formats[] = {
157 "%c",
158 "%a %b %d %T",
159 "%Z",
160 "%Y",
161 " %Y",
162 NULL
164 /* We only ever did two timezones in the bad old format .. */
165 const char *timezones[] = {
166 "PDT", "PST", "CEST", NULL
168 const char **fmt = formats;
170 p = buffer;
171 while (isspace(c = *buf))
172 buf++;
173 while ((c = *buf++) != '\n')
174 *p++ = c;
175 *p++ = 0;
176 buf = buffer;
177 memset(&tm, 0, sizeof(tm));
178 do {
179 const char *next = strptime(buf, *fmt, &tm);
180 if (next) {
181 if (!*next)
182 return mktime(&tm);
183 buf = next;
184 } else {
185 const char **p = timezones;
186 while (isspace(*buf))
187 buf++;
188 while (*p) {
189 if (!memcmp(buf, *p, strlen(*p))) {
190 buf += strlen(*p);
191 break;
193 p++;
196 fmt++;
197 } while (*buf && *fmt);
198 printf("left: %s\n", buf);
199 return mktime(&tm);
202 static int convert_date_line(char *dst, void **buf, unsigned long *sp)
204 unsigned long size = *sp;
205 char *line = *buf;
206 char *next = strchr(line, '\n');
207 char *date = strchr(line, '>');
208 int len;
210 if (!next || !date)
211 die("missing or bad author/committer line %s", line);
212 next++; date += 2;
214 *buf = next;
215 *sp = size - (next - line);
217 len = date - line;
218 memcpy(dst, line, len);
219 dst += len;
221 /* Is it already in new format? */
222 if (isdigit(*date)) {
223 int datelen = next - date;
224 memcpy(dst, date, datelen);
225 return len + datelen;
229 * Hacky hacky: one of the sparse old-style commits does not have
230 * any date at all, but we can fake it by using the committer date.
232 if (*date == '\n' && strchr(next, '>'))
233 date = strchr(next, '>')+2;
235 return len + sprintf(dst, "%lu -0700\n", parse_oldstyle_date(date));
238 static void convert_date(void *buffer, unsigned long size, unsigned char *result_sha1)
240 char *new = xmalloc(size + 100);
241 unsigned long newlen = 0;
243 // "tree <sha1>\n"
244 memcpy(new + newlen, buffer, 46);
245 newlen += 46;
246 buffer += 46;
247 size -= 46;
249 // "parent <sha1>\n"
250 while (!memcmp(buffer, "parent ", 7)) {
251 memcpy(new + newlen, buffer, 48);
252 newlen += 48;
253 buffer += 48;
254 size -= 48;
257 // "author xyz <xyz> date"
258 newlen += convert_date_line(new + newlen, &buffer, &size);
259 // "committer xyz <xyz> date"
260 newlen += convert_date_line(new + newlen, &buffer, &size);
262 // Rest
263 memcpy(new + newlen, buffer, size);
264 newlen += size;
266 write_sha1_file(new, newlen, "commit", result_sha1);
267 free(new);
270 static void convert_commit(void *buffer, unsigned long size, unsigned char *result_sha1)
272 void *orig_buffer = buffer;
273 unsigned long orig_size = size;
275 if (memcmp(buffer, "tree ", 5))
276 die("Bad commit '%s'", (char*) buffer);
277 convert_ascii_sha1(buffer+5);
278 buffer += 46; /* "tree " + "hex sha1" + "\n" */
279 while (!memcmp(buffer, "parent ", 7)) {
280 convert_ascii_sha1(buffer+7);
281 buffer += 48;
283 convert_date(orig_buffer, orig_size, result_sha1);
286 static struct entry * convert_entry(unsigned char *sha1)
288 struct entry *entry = lookup_entry(sha1);
289 char type[20];
290 void *buffer, *data;
291 unsigned long size;
293 if (entry->converted)
294 return entry;
295 data = read_sha1_file(sha1, type, &size);
296 if (!data)
297 die("unable to read object %s", sha1_to_hex(sha1));
299 buffer = xmalloc(size);
300 memcpy(buffer, data, size);
302 if (!strcmp(type, "blob")) {
303 write_sha1_file(buffer, size, "blob", entry->new_sha1);
304 } else if (!strcmp(type, "tree"))
305 convert_tree(buffer, size, entry->new_sha1);
306 else if (!strcmp(type, "commit"))
307 convert_commit(buffer, size, entry->new_sha1);
308 else
309 die("unknown object type '%s' in %s", type, sha1_to_hex(sha1));
310 entry->converted = 1;
311 free(buffer);
312 free(data);
313 return entry;
316 int main(int argc, char **argv)
318 unsigned char sha1[20];
319 struct entry *entry;
321 if (argc != 2 || get_sha1(argv[1], sha1))
322 usage("git-convert-objects <sha1>");
324 entry = convert_entry(sha1);
325 printf("new sha1: %s\n", sha1_to_hex(entry->new_sha1));
326 return 0;