remote-bzr: improve author sanitazion
[git.git] / contrib / remote-helpers / git-remote-bzr
blobaf46016dc9488488bbc85530618e8a61926cfb47
1 #!/usr/bin/env python
3 # Copyright (c) 2012 Felipe Contreras
7 # Just copy to your ~/bin, or anywhere in your $PATH.
8 # Then you can clone with:
9 # % git clone bzr::/path/to/bzr/repo/or/url
11 # For example:
12 # % git clone bzr::$HOME/myrepo
13 # or
14 # % git clone bzr::lp:myrepo
17 import sys
19 import bzrlib
20 if hasattr(bzrlib, "initialize"):
21 bzrlib.initialize()
23 import bzrlib.plugin
24 bzrlib.plugin.load_plugins()
26 import bzrlib.generate_ids
27 import bzrlib.transport
28 import bzrlib.errors
29 import bzrlib.ui
30 import bzrlib.urlutils
32 import sys
33 import os
34 import json
35 import re
36 import StringIO
37 import atexit, shutil, hashlib, urlparse, subprocess
39 NAME_RE = re.compile('^([^<>]+)')
40 AUTHOR_RE = re.compile('^([^<>]+?)? ?<([^<>]*)>$')
41 EMAIL_RE = re.compile('^([^<>]+[^ \\\t<>])?\\b(?:[ \\t<>]*?)\\b([^ \\t<>]+@[^ \\t<>]+)')
42 RAW_AUTHOR_RE = re.compile('^(\w+) (.+)? <(.*)> (\d+) ([+-]\d+)')
44 def die(msg, *args):
45 sys.stderr.write('ERROR: %s\n' % (msg % args))
46 sys.exit(1)
48 def warn(msg, *args):
49 sys.stderr.write('WARNING: %s\n' % (msg % args))
51 def gittz(tz):
52 return '%+03d%02d' % (tz / 3600, tz % 3600 / 60)
54 class Marks:
56 def __init__(self, path):
57 self.path = path
58 self.tips = {}
59 self.marks = {}
60 self.rev_marks = {}
61 self.last_mark = 0
62 self.load()
64 def load(self):
65 if not os.path.exists(self.path):
66 return
68 tmp = json.load(open(self.path))
69 self.tips = tmp['tips']
70 self.marks = tmp['marks']
71 self.last_mark = tmp['last-mark']
73 for rev, mark in self.marks.iteritems():
74 self.rev_marks[mark] = rev
76 def dict(self):
77 return { 'tips': self.tips, 'marks': self.marks, 'last-mark' : self.last_mark }
79 def store(self):
80 json.dump(self.dict(), open(self.path, 'w'))
82 def __str__(self):
83 return str(self.dict())
85 def from_rev(self, rev):
86 return self.marks[rev]
88 def to_rev(self, mark):
89 return self.rev_marks[mark]
91 def next_mark(self):
92 self.last_mark += 1
93 return self.last_mark
95 def get_mark(self, rev):
96 self.last_mark += 1
97 self.marks[rev] = self.last_mark
98 return self.last_mark
100 def is_marked(self, rev):
101 return rev in self.marks
103 def new_mark(self, rev, mark):
104 self.marks[rev] = mark
105 self.rev_marks[mark] = rev
106 self.last_mark = mark
108 def get_tip(self, branch):
109 return self.tips.get(branch, None)
111 def set_tip(self, branch, tip):
112 self.tips[branch] = tip
114 class Parser:
116 def __init__(self, repo):
117 self.repo = repo
118 self.line = self.get_line()
120 def get_line(self):
121 return sys.stdin.readline().strip()
123 def __getitem__(self, i):
124 return self.line.split()[i]
126 def check(self, word):
127 return self.line.startswith(word)
129 def each_block(self, separator):
130 while self.line != separator:
131 yield self.line
132 self.line = self.get_line()
134 def __iter__(self):
135 return self.each_block('')
137 def next(self):
138 self.line = self.get_line()
139 if self.line == 'done':
140 self.line = None
142 def get_mark(self):
143 i = self.line.index(':') + 1
144 return int(self.line[i:])
146 def get_data(self):
147 if not self.check('data'):
148 return None
149 i = self.line.index(' ') + 1
150 size = int(self.line[i:])
151 return sys.stdin.read(size)
153 def get_author(self):
154 m = RAW_AUTHOR_RE.match(self.line)
155 if not m:
156 return None
157 _, name, email, date, tz = m.groups()
158 committer = '%s <%s>' % (name, email)
159 tz = int(tz)
160 tz = ((tz / 100) * 3600) + ((tz % 100) * 60)
161 return (committer, int(date), tz)
163 def rev_to_mark(rev):
164 global marks
165 return marks.from_rev(rev)
167 def mark_to_rev(mark):
168 global marks
169 return marks.to_rev(mark)
171 def fixup_user(user):
172 name = mail = None
173 user = user.replace('"', '')
174 m = AUTHOR_RE.match(user)
175 if m:
176 name = m.group(1)
177 mail = m.group(2).strip()
178 else:
179 m = EMAIL_RE.match(user)
180 if m:
181 name = m.group(1)
182 mail = m.group(2)
183 else:
184 m = NAME_RE.match(user)
185 if m:
186 name = m.group(1).strip()
188 if not name:
189 name = 'unknown'
190 if not mail:
191 mail = 'Unknown'
193 return '%s <%s>' % (name, mail)
195 def get_filechanges(cur, prev):
196 modified = {}
197 removed = {}
199 changes = cur.changes_from(prev)
201 def u(s):
202 return s.encode('utf-8')
204 for path, fid, kind in changes.added:
205 modified[u(path)] = fid
206 for path, fid, kind in changes.removed:
207 removed[u(path)] = None
208 for path, fid, kind, mod, _ in changes.modified:
209 modified[u(path)] = fid
210 for oldpath, newpath, fid, kind, mod, _ in changes.renamed:
211 removed[u(oldpath)] = None
212 if kind == 'directory':
213 lst = cur.list_files(from_dir=newpath, recursive=True)
214 for path, file_class, kind, fid, entry in lst:
215 if kind != 'directory':
216 modified[u(newpath + '/' + path)] = fid
217 else:
218 modified[u(newpath)] = fid
220 return modified, removed
222 def export_files(tree, files):
223 global marks, filenodes
225 final = []
226 for path, fid in files.iteritems():
227 kind = tree.kind(fid)
229 h = tree.get_file_sha1(fid)
231 if kind == 'symlink':
232 d = tree.get_symlink_target(fid)
233 mode = '120000'
234 elif kind == 'file':
236 if tree.is_executable(fid):
237 mode = '100755'
238 else:
239 mode = '100644'
241 # is the blob already exported?
242 if h in filenodes:
243 mark = filenodes[h]
244 final.append((mode, mark, path))
245 continue
247 d = tree.get_file_text(fid)
248 elif kind == 'directory':
249 continue
250 else:
251 die("Unhandled kind '%s' for path '%s'" % (kind, path))
253 mark = marks.next_mark()
254 filenodes[h] = mark
256 print "blob"
257 print "mark :%u" % mark
258 print "data %d" % len(d)
259 print d
261 final.append((mode, mark, path))
263 return final
265 def export_branch(repo, name):
266 global prefix
268 ref = '%s/heads/%s' % (prefix, name)
269 tip = marks.get_tip(name)
271 branch = branches[name]
272 repo = branch.repository
274 branch.lock_read()
275 revs = branch.iter_merge_sorted_revisions(None, tip, 'exclude', 'forward')
276 count = 0
278 revs = [revid for revid, _, _, _ in revs if not marks.is_marked(revid)]
280 for revid in revs:
282 rev = repo.get_revision(revid)
284 parents = rev.parent_ids
285 time = rev.timestamp
286 tz = rev.timezone
287 committer = rev.committer.encode('utf-8')
288 committer = "%s %u %s" % (fixup_user(committer), time, gittz(tz))
289 authors = rev.get_apparent_authors()
290 if authors:
291 author = authors[0].encode('utf-8')
292 author = "%s %u %s" % (fixup_user(author), time, gittz(tz))
293 else:
294 author = committer
295 msg = rev.message.encode('utf-8')
297 msg += '\n'
299 if len(parents) == 0:
300 parent = bzrlib.revision.NULL_REVISION
301 else:
302 parent = parents[0]
304 cur_tree = repo.revision_tree(revid)
305 prev = repo.revision_tree(parent)
306 modified, removed = get_filechanges(cur_tree, prev)
308 modified_final = export_files(cur_tree, modified)
310 if len(parents) == 0:
311 print 'reset %s' % ref
313 print "commit %s" % ref
314 print "mark :%d" % (marks.get_mark(revid))
315 print "author %s" % (author)
316 print "committer %s" % (committer)
317 print "data %d" % (len(msg))
318 print msg
320 for i, p in enumerate(parents):
321 try:
322 m = rev_to_mark(p)
323 except KeyError:
324 # ghost?
325 continue
326 if i == 0:
327 print "from :%s" % m
328 else:
329 print "merge :%s" % m
331 for f in removed:
332 print "D %s" % (f,)
333 for f in modified_final:
334 print "M %s :%u %s" % f
335 print
337 count += 1
338 if (count % 100 == 0):
339 print "progress revision %s '%s' (%d/%d)" % (revid, name, count, len(revs))
340 print "#############################################################"
342 branch.unlock()
344 revid = branch.last_revision()
346 # make sure the ref is updated
347 print "reset %s" % ref
348 print "from :%u" % rev_to_mark(revid)
349 print
351 marks.set_tip(name, revid)
353 def export_tag(repo, name):
354 global tags, prefix
356 ref = '%s/tags/%s' % (prefix, name)
357 print "reset %s" % ref
358 print "from :%u" % rev_to_mark(tags[name])
359 print
361 def do_import(parser):
362 global dirname
364 repo = parser.repo
365 path = os.path.join(dirname, 'marks-git')
367 print "feature done"
368 if os.path.exists(path):
369 print "feature import-marks=%s" % path
370 print "feature export-marks=%s" % path
371 print "feature force"
372 sys.stdout.flush()
374 while parser.check('import'):
375 ref = parser[1]
376 if ref.startswith('refs/heads/'):
377 name = ref[len('refs/heads/'):]
378 export_branch(repo, name)
379 if ref.startswith('refs/tags/'):
380 name = ref[len('refs/tags/'):]
381 export_tag(repo, name)
382 parser.next()
384 print 'done'
386 sys.stdout.flush()
388 def parse_blob(parser):
389 global blob_marks
391 parser.next()
392 mark = parser.get_mark()
393 parser.next()
394 data = parser.get_data()
395 blob_marks[mark] = data
396 parser.next()
398 class CustomTree():
400 def __init__(self, branch, revid, parents, files):
401 global files_cache
403 self.updates = {}
404 self.branch = branch
406 def copy_tree(revid):
407 files = files_cache[revid] = {}
408 branch.lock_read()
409 tree = branch.repository.revision_tree(revid)
410 try:
411 for path, entry in tree.iter_entries_by_dir():
412 files[path] = [entry.file_id, None]
413 finally:
414 branch.unlock()
415 return files
417 if len(parents) == 0:
418 self.base_id = bzrlib.revision.NULL_REVISION
419 self.base_files = {}
420 else:
421 self.base_id = parents[0]
422 self.base_files = files_cache.get(self.base_id, None)
423 if not self.base_files:
424 self.base_files = copy_tree(self.base_id)
426 self.files = files_cache[revid] = self.base_files.copy()
427 self.rev_files = {}
429 for path, data in self.files.iteritems():
430 fid, mark = data
431 self.rev_files[fid] = [path, mark]
433 for path, f in files.iteritems():
434 fid, mark = self.files.get(path, [None, None])
435 if not fid:
436 fid = bzrlib.generate_ids.gen_file_id(path)
437 f['path'] = path
438 self.rev_files[fid] = [path, mark]
439 self.updates[fid] = f
441 def last_revision(self):
442 return self.base_id
444 def iter_changes(self):
445 changes = []
447 def get_parent(dirname, basename):
448 parent_fid, mark = self.base_files.get(dirname, [None, None])
449 if parent_fid:
450 return parent_fid
451 parent_fid, mark = self.files.get(dirname, [None, None])
452 if parent_fid:
453 return parent_fid
454 if basename == '':
455 return None
456 fid = bzrlib.generate_ids.gen_file_id(path)
457 add_entry(fid, dirname, 'directory')
458 return fid
460 def add_entry(fid, path, kind, mode = None):
461 dirname, basename = os.path.split(path)
462 parent_fid = get_parent(dirname, basename)
464 executable = False
465 if mode == '100755':
466 executable = True
467 elif mode == '120000':
468 kind = 'symlink'
470 change = (fid,
471 (None, path),
472 True,
473 (False, True),
474 (None, parent_fid),
475 (None, basename),
476 (None, kind),
477 (None, executable))
478 self.files[path] = [change[0], None]
479 changes.append(change)
481 def update_entry(fid, path, kind, mode = None):
482 dirname, basename = os.path.split(path)
483 parent_fid = get_parent(dirname, basename)
485 executable = False
486 if mode == '100755':
487 executable = True
488 elif mode == '120000':
489 kind = 'symlink'
491 change = (fid,
492 (path, path),
493 True,
494 (True, True),
495 (None, parent_fid),
496 (None, basename),
497 (None, kind),
498 (None, executable))
499 self.files[path] = [change[0], None]
500 changes.append(change)
502 def remove_entry(fid, path, kind):
503 dirname, basename = os.path.split(path)
504 parent_fid = get_parent(dirname, basename)
505 change = (fid,
506 (path, None),
507 True,
508 (True, False),
509 (parent_fid, None),
510 (None, None),
511 (None, None),
512 (None, None))
513 del self.files[path]
514 changes.append(change)
516 for fid, f in self.updates.iteritems():
517 path = f['path']
519 if 'deleted' in f:
520 remove_entry(fid, path, 'file')
521 continue
523 if path in self.base_files:
524 update_entry(fid, path, 'file', f['mode'])
525 else:
526 add_entry(fid, path, 'file', f['mode'])
528 self.files[path][1] = f['mark']
529 self.rev_files[fid][1] = f['mark']
531 return changes
533 def get_content(self, file_id):
534 path, mark = self.rev_files[file_id]
535 if mark:
536 return blob_marks[mark]
538 # last resort
539 tree = self.branch.repository.revision_tree(self.base_id)
540 return tree.get_file_text(file_id)
542 def get_file_with_stat(self, file_id, path=None):
543 content = self.get_content(file_id)
544 return (StringIO.StringIO(content), None)
546 def get_symlink_target(self, file_id):
547 return self.get_content(file_id)
549 def id2path(self, file_id):
550 path, mark = self.rev_files[file_id]
551 return path
553 def c_style_unescape(string):
554 if string[0] == string[-1] == '"':
555 return string.decode('string-escape')[1:-1]
556 return string
558 def parse_commit(parser):
559 global marks, blob_marks, parsed_refs
560 global mode
562 parents = []
564 ref = parser[1]
565 parser.next()
567 if ref.startswith('refs/heads/'):
568 name = ref[len('refs/heads/'):]
569 branch = branches[name]
570 else:
571 die('unknown ref')
573 commit_mark = parser.get_mark()
574 parser.next()
575 author = parser.get_author()
576 parser.next()
577 committer = parser.get_author()
578 parser.next()
579 data = parser.get_data()
580 parser.next()
581 if parser.check('from'):
582 parents.append(parser.get_mark())
583 parser.next()
584 while parser.check('merge'):
585 parents.append(parser.get_mark())
586 parser.next()
588 # fast-export adds an extra newline
589 if data[-1] == '\n':
590 data = data[:-1]
592 files = {}
594 for line in parser:
595 if parser.check('M'):
596 t, m, mark_ref, path = line.split(' ', 3)
597 mark = int(mark_ref[1:])
598 f = { 'mode' : m, 'mark' : mark }
599 elif parser.check('D'):
600 t, path = line.split(' ')
601 f = { 'deleted' : True }
602 else:
603 die('Unknown file command: %s' % line)
604 path = c_style_unescape(path).decode('utf-8')
605 files[path] = f
607 committer, date, tz = committer
608 parents = [str(mark_to_rev(p)) for p in parents]
609 revid = bzrlib.generate_ids.gen_revision_id(committer, date)
610 props = {}
611 props['branch-nick'] = branch.nick
613 mtree = CustomTree(branch, revid, parents, files)
614 changes = mtree.iter_changes()
616 branch.lock_write()
617 try:
618 builder = branch.get_commit_builder(parents, None, date, tz, committer, props, revid)
619 try:
620 list(builder.record_iter_changes(mtree, mtree.last_revision(), changes))
621 builder.finish_inventory()
622 builder.commit(data.decode('utf-8', 'replace'))
623 except Exception, e:
624 builder.abort()
625 raise
626 finally:
627 branch.unlock()
629 parsed_refs[ref] = revid
630 marks.new_mark(revid, commit_mark)
632 def parse_reset(parser):
633 global parsed_refs
635 ref = parser[1]
636 parser.next()
638 # ugh
639 if parser.check('commit'):
640 parse_commit(parser)
641 return
642 if not parser.check('from'):
643 return
644 from_mark = parser.get_mark()
645 parser.next()
647 parsed_refs[ref] = mark_to_rev(from_mark)
649 def do_export(parser):
650 global parsed_refs, dirname
652 parser.next()
654 for line in parser.each_block('done'):
655 if parser.check('blob'):
656 parse_blob(parser)
657 elif parser.check('commit'):
658 parse_commit(parser)
659 elif parser.check('reset'):
660 parse_reset(parser)
661 elif parser.check('tag'):
662 pass
663 elif parser.check('feature'):
664 pass
665 else:
666 die('unhandled export command: %s' % line)
668 for ref, revid in parsed_refs.iteritems():
669 name = ref[len('refs/heads/'):]
670 branch = branches[name]
671 branch.generate_revision_history(revid, marks.get_tip(name))
673 if name in peers:
674 peer = peers[name]
675 try:
676 peer.bzrdir.push_branch(branch, revision_id=revid)
677 except bzrlib.errors.DivergedBranches:
678 print "error %s non-fast forward" % ref
679 continue
681 try:
682 wt = branch.bzrdir.open_workingtree()
683 wt.update()
684 except bzrlib.errors.NoWorkingTree:
685 pass
687 print "ok %s" % ref
689 print
691 def do_capabilities(parser):
692 global dirname
694 print "import"
695 print "export"
696 print "refspec refs/heads/*:%s/heads/*" % prefix
697 print "refspec refs/tags/*:%s/tags/*" % prefix
699 path = os.path.join(dirname, 'marks-git')
701 if os.path.exists(path):
702 print "*import-marks %s" % path
703 print "*export-marks %s" % path
705 print
707 def ref_is_valid(name):
708 return not True in [c in name for c in '~^: \\']
710 def do_list(parser):
711 global tags
713 master_branch = None
715 for name in branches:
716 if not master_branch:
717 master_branch = name
718 print "? refs/heads/%s" % name
720 branch = branches[master_branch]
721 branch.lock_read()
722 for tag, revid in branch.tags.get_tag_dict().items():
723 try:
724 branch.revision_id_to_dotted_revno(revid)
725 except bzrlib.errors.NoSuchRevision:
726 continue
727 if not ref_is_valid(tag):
728 continue
729 print "? refs/tags/%s" % tag
730 tags[tag] = revid
731 branch.unlock()
733 print "@refs/heads/%s HEAD" % master_branch
734 print
736 def get_remote_branch(origin, remote_branch, name):
737 global dirname, peers
739 branch_path = os.path.join(dirname, 'clone', name)
740 if os.path.exists(branch_path):
741 # pull
742 d = bzrlib.bzrdir.BzrDir.open(branch_path)
743 branch = d.open_branch()
744 try:
745 branch.pull(remote_branch, [], None, False)
746 except bzrlib.errors.DivergedBranches:
747 # use remote branch for now
748 return remote_branch
749 else:
750 # clone
751 d = origin.sprout(branch_path, None,
752 hardlink=True, create_tree_if_local=False,
753 force_new_repo=False,
754 source_branch=remote_branch)
755 branch = d.open_branch()
757 return branch
759 def get_repo(url, alias):
760 global dirname, peer, branches
762 normal_url = bzrlib.urlutils.normalize_url(url)
763 origin = bzrlib.bzrdir.BzrDir.open(url)
764 is_local = isinstance(origin.transport, bzrlib.transport.local.LocalTransport)
766 shared_path = os.path.join(gitdir, 'bzr')
767 try:
768 shared_dir = bzrlib.bzrdir.BzrDir.open(shared_path)
769 except bzrlib.errors.NotBranchError:
770 shared_dir = bzrlib.bzrdir.BzrDir.create(shared_path)
771 try:
772 shared_repo = shared_dir.open_repository()
773 except bzrlib.errors.NoRepositoryPresent:
774 shared_repo = shared_dir.create_repository(shared=True)
776 if not is_local:
777 clone_path = os.path.join(dirname, 'clone')
778 if not os.path.exists(clone_path):
779 os.mkdir(clone_path)
781 try:
782 repo = origin.open_repository()
783 except bzrlib.errors.NoRepositoryPresent:
784 # branch
786 name = 'master'
787 branch = origin.open_branch()
789 if not is_local:
790 peers[name] = branch
791 branches[name] = get_remote_branch(origin, branch, name)
792 else:
793 branches[name] = branch
795 return branch.repository
796 else:
797 # repository
799 for branch in repo.find_branches():
801 name = repo.user_transport.relpath(branch.base)
802 name = name if name != '' else 'master'
803 name = name.replace('/', '+')
805 if not is_local:
806 peers[name] = branch
807 branches[name] = get_remote_branch(origin, branch, name)
808 else:
809 branches[name] = branch
811 return repo
813 def fix_path(alias, orig_url):
814 url = urlparse.urlparse(orig_url, 'file')
815 if url.scheme != 'file' or os.path.isabs(url.path):
816 return
817 abs_url = urlparse.urljoin("%s/" % os.getcwd(), orig_url)
818 cmd = ['git', 'config', 'remote.%s.url' % alias, "bzr::%s" % abs_url]
819 subprocess.call(cmd)
821 def main(args):
822 global marks, prefix, gitdir, dirname
823 global tags, filenodes
824 global blob_marks
825 global parsed_refs
826 global files_cache
827 global is_tmp
828 global branches, peers
830 alias = args[1]
831 url = args[2]
833 tags = {}
834 filenodes = {}
835 blob_marks = {}
836 parsed_refs = {}
837 files_cache = {}
838 marks = None
839 branches = {}
840 peers = {}
842 if alias[5:] == url:
843 is_tmp = True
844 alias = hashlib.sha1(alias).hexdigest()
845 else:
846 is_tmp = False
848 prefix = 'refs/bzr/%s' % alias
849 gitdir = os.environ['GIT_DIR']
850 dirname = os.path.join(gitdir, 'bzr', alias)
852 if not is_tmp:
853 fix_path(alias, url)
855 if not os.path.exists(dirname):
856 os.makedirs(dirname)
858 bzrlib.ui.ui_factory.be_quiet(True)
860 repo = get_repo(url, alias)
862 marks_path = os.path.join(dirname, 'marks-int')
863 marks = Marks(marks_path)
865 parser = Parser(repo)
866 for line in parser:
867 if parser.check('capabilities'):
868 do_capabilities(parser)
869 elif parser.check('list'):
870 do_list(parser)
871 elif parser.check('import'):
872 do_import(parser)
873 elif parser.check('export'):
874 do_export(parser)
875 else:
876 die('unhandled command: %s' % line)
877 sys.stdout.flush()
879 def bye():
880 if not marks:
881 return
882 if not is_tmp:
883 marks.store()
884 else:
885 shutil.rmtree(dirname)
887 atexit.register(bye)
888 sys.exit(main(sys.argv))