8 from collections
import defaultdict
10 _msg_id_regex
= re
.compile(r
'<([^<>]+)>')
11 def parse_msg_id(str):
12 return _msg_id_regex
.search(str).group(1)
14 parser
= email
.Parser
.Parser()
15 mbox
= mailbox
.mbox(sys
.argv
[1], parser
.parse
)
17 mbox_parsed
= list(mbox
)
21 for msg
in mbox_parsed
:
22 msgid
= parse_msg_id(msg
['Message-Id'])
26 def detect_reply_id(msg
):
27 if msg
['In-Reply-To']:
28 return parse_msg_id(msg
['In-Reply-To'])
30 refs
= ' '.join(msg
.get_all('References'))
31 ref_ids
= [m
.group(1) for m
in _msg_id_regex
.finditer(refs
)]
36 for msg
in mbox_parsed
:
37 parent_id
= detect_reply_id(msg
)
39 parents
[msg
] = by_id
[parent_id
]
43 children
= defaultdict(list)
44 for msg
, parent
in parents
.iteritems():
45 children
[parent
].append(msg
)
47 def get_text_payload(msg
):
48 if not msg
.is_multipart():
49 return msg
.get_payload()
50 textpart
= max(email
.Iterators
.typed_subpart_iterator(msg
), key
=len)
51 if textpart
.is_multipart():
52 return textpart
.get_payload(0)
54 return textpart
.get_payload()
56 _format_patch_regex
= re
.compile('.*^---$.*^diff --git', re
.MULTILINE|re
.DOTALL
)
57 _snip_patch_regex
= re
.compile('.*^-+ ?(?:8<|>8) ?-+\n(.*^diff --git.*)',
58 re
.MULTILINE|re
.DOTALL
)
59 def guess_patch_contents(msg
):
60 p
= get_text_payload(msg
)
61 if _format_patch_regex
.match(p
):
63 return msg
.as_string()
64 m
= _snip_patch_regex
.match(p
)
66 msg
.set_payload(m
.group(1))
67 return msg
.as_string()
71 def recurse_thread(msg
):
73 for child
in children
[msg
]:
74 for m
in recurse_thread(child
):
77 for msg
in recurse_thread(root
):
78 p
= guess_patch_contents(msg
)
82 print 'Parent:', id_map
[parents
[msg
]]