Revert "... and stop calling the blobtracker"
[trackgit.git] / parsethread.py
blobae6a1cec5ab8443e181539373840621632ca74bf
1 #!/usr/bin/python
3 import sys
4 import re
5 import mailbox
6 import email.Iterators
7 import email.Parser
8 from collections import defaultdict
10 _msg_id_regex = re.compile(r'<([^<>]+)>')
11 def parse_msg_id(str):
12 return _msg_id_regex.search(str).group(1)
14 parser = email.Parser.Parser()
15 mbox = mailbox.mbox(sys.argv[1], parser.parse)
17 mbox_parsed = list(mbox)
19 by_id = {}
20 id_map = {}
21 for msg in mbox_parsed:
22 msgid = parse_msg_id(msg['Message-Id'])
23 by_id[msgid] = msg
24 id_map[msg] = msgid
26 def detect_reply_id(msg):
27 if msg['In-Reply-To']:
28 return parse_msg_id(msg['In-Reply-To'])
29 if msg['References']:
30 refs = ' '.join(msg.get_all('References'))
31 ref_ids = [m.group(1) for m in _msg_id_regex.finditer(refs)]
32 return ref_ids[-1]
34 parents = {}
35 root = None
36 for msg in mbox_parsed:
37 parent_id = detect_reply_id(msg)
38 if parent_id:
39 parents[msg] = by_id[parent_id]
40 else:
41 root = msg
43 children = defaultdict(list)
44 for msg, parent in parents.iteritems():
45 children[parent].append(msg)
47 def get_text_payload(msg):
48 if not msg.is_multipart():
49 return msg.get_payload()
50 textpart = max(email.Iterators.typed_subpart_iterator(msg), key=len)
51 if textpart.is_multipart():
52 return textpart.get_payload(0)
53 else:
54 return textpart.get_payload()
56 _format_patch_regex = re.compile('.*^---$.*^diff --git', re.MULTILINE|re.DOTALL)
57 _snip_patch_regex = re.compile('.*^-+ ?(?:8<|>8) ?-+\n(.*^diff --git.*)',
58 re.MULTILINE|re.DOTALL)
59 def guess_patch_contents(msg):
60 p = get_text_payload(msg)
61 if _format_patch_regex.match(p):
62 msg.set_payload(p)
63 return msg.as_string()
64 m = _snip_patch_regex.match(p)
65 if m:
66 msg.set_payload(m.group(1))
67 return msg.as_string()
68 # no patch found
69 return None
71 def recurse_thread(msg):
72 yield msg
73 for child in children[msg]:
74 for m in recurse_thread(child):
75 yield m
77 for msg in recurse_thread(root):
78 p = guess_patch_contents(msg)
79 if p:
80 print id_map[msg]
81 if msg in parents:
82 print 'Parent:', id_map[parents[msg]]