Cleanups: dead code removal/etc.
[greylag.git] / greylag_merge.py
blobb3a48c23031707456452f24f7109694133dca6b2
1 #!/usr/bin/env python2.5
3 '''Merge a set of greylag search result files, producing a search result file
4 that summarizes the best matches found.
6 '''
8 from __future__ import with_statement
10 __copyright__ = '''
11 greylag, Copyright (C) 2006-2007, Stowers Institute for Medical Research
13 This program is free software; you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation; either version 2 of the License, or
16 (at your option) any later version.
18 This program is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
23 You should have received a copy of the GNU General Public License along
24 with this program; if not, write to the Free Software Foundation, Inc.,
25 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 '''
28 __version__ = "0.0"
31 import contextlib
32 import cPickle
33 import gzip
34 import optparse
35 import os.path
36 from pprint import pprint
37 import sys
40 def warn(s):
41 print >> sys.stderr, 'warning:', s
42 def error(s):
43 sys.exit('error: ' + s)
46 def check_consistency(r0, r1):
47 """Warn on any inconsistencies between the two results that are about to
48 be merged."""
50 if r0['version'] != r1['version']:
51 warn("merging runs searched with different greylag versions!"
52 " (rerunning with same version strongly recommended)")
53 k0 = set(r0.keys())
54 k1 = set(r1.keys())
55 if k0 != k1:
56 warn("mismatched keys: %s" % (k0 ^ k1))
58 varying_keys = set(['matches', 'total comparisons', 'argv'])
59 for k in k0 - varying_keys:
60 if r0[k] != r1.get(k):
61 warn("search context differs: %s (%s vs %s)" % (k, r0[k], r1[k]))
64 def score_equal(s1, s2):
65 return abs(s1-s2) < 1e-6
68 def merge_match(m0, m1):
69 """Merge a particular spectrum match from m1 into m0."""
70 m0[0]['comparisons'] += m1[0]['comparisons']
72 # NB: This merge code, and score_equal above, must functionally match the
73 # merge code in cgreylag.cpp:evaluate_peptide!
75 keep = max(len(m0), len(m1))
76 assert keep >= 1
77 matches = m0[1] + m1[1]
78 matches.sort(key=lambda x: x['score'], reverse=True)
80 merged_matches = [matches.pop()]
81 while len(merged_matches) < keep and matches:
82 c0, c1 = merged_matches[-1], matches.pop()
83 if (score_equal(c0['score'], c1['score'])
84 and c0['peptide_sequence'] == c1['peptide_sequence']
85 and c0['mass_trace'] == c1['mass_trace']):
86 continue
87 merged_matches.append(c1)
90 def merge_matches(m0, m1):
91 """Merge the match information in m1 into m0."""
92 for m1k, m1v in m1.iteritems():
93 if m1k not in m0:
94 m0[m1k] = m1v
95 else:
96 merge_match(m0[m1k], m1v)
99 def main(args=sys.argv[1:]):
100 parser = optparse.OptionParser(usage=
101 "usage: %prog [options] <result-file>..."
102 " <output-result-file>",
103 description=__doc__, version=__version__)
104 pa = parser.add_option
105 pa("-v", "--verbose", action="store_true", dest="verbose",
106 help="be verbose")
107 pa("--copyright", action="store_true", dest="copyright",
108 help="print copyright and exit")
109 (options, args) = parser.parse_args(args=args)
111 if options.copyright:
112 print __copyright__
113 sys.exit(0)
115 if len(args) < 2:
116 parser.print_help()
117 sys.exit(1)
119 result_fn_0 = args[0]
120 result_fn_1_N = args[1:-1]
121 output_fn = args[-1]
123 # spectrum name -> match list/spectrum info
124 matches = {}
125 with contextlib.closing(gzip.open(result_fn_0)) as r_file:
126 r0 = cPickle.load(r_file)
127 matches = r0['matches']
128 total_comparisons = r0['total comparisons']
129 if options.verbose:
130 print >> sys.stderr, "loaded", result_fn_0
132 for additional_result_fn in result_fn_1_N:
133 with contextlib.closing(gzip.open(additional_result_fn)) as r1_file:
134 r1 = cPickle.load(r1_file)
135 check_consistency(r0, r1)
136 merge_matches(matches, r1['matches'])
137 total_comparisons += r1['total comparisons']
138 if options.verbose:
139 print >> sys.stderr, "merged", additional_result_fn
141 r0['matches'] = matches
142 r0['total comparisons'] = total_comparisons
143 with contextlib.closing(gzip.open(output_fn, 'w')) as output_file:
144 cPickle.dump(r0, output_file, cPickle.HIGHEST_PROTOCOL)
145 if options.verbose:
146 print >> sys.stderr, "dumped", output_fn
149 if __name__ == '__main__':
150 main()