minor code reorg (use new Python if/else operator)
[greylag.git] / greylag_merge.py
blob94f52837a9b23fb397493159deff45a688c5f447
1 #!/usr/bin/env greylag-python
3 '''Merge a set of greylag search result files, producing a search result file
4 that summarizes the best matches found.
6 '''
8 from __future__ import with_statement
10 __copyright__ = '''
11 greylag, Copyright (C) 2006-2007, Stowers Institute for Medical Research
13 This program is free software; you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation; either version 2 of the License, or
16 (at your option) any later version.
18 This program is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
23 You should have received a copy of the GNU General Public License along
24 with this program; if not, write to the Free Software Foundation, Inc.,
25 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 '''
28 __version__ = "0.0"
31 import contextlib
32 import cPickle
33 import optparse
34 import os.path
35 from pprint import pprint
36 import sys
39 def warn(s):
40 print >> sys.stderr, 'warning:', s
41 def error(s):
42 sys.exit('error: ' + s)
45 def check_consistency(r0, r1):
46 """Warn on any inconsistencies between the two results that are about to
47 be merged."""
49 if r0['version'] != r1['version']:
50 warn("merging runs searched with different greylag versions!"
51 " (rerunning with same version strongly recommended)")
52 k0 = set(r0.keys())
53 k1 = set(r1.keys())
54 if k0 != k1:
55 warn("mismatched keys: %s" % (k0 ^ k1))
57 # FIX: this restriction goes away later
58 if r0['spectrum files'] != r1['spectrum files']:
59 error('merging across differing sets of ms2 files not yet implemented'
60 ' (%s vs %s)' % (r0['spectrum files'], r1['spectrum files']))
62 varying_keys = set(['matches', 'total comparisons', 'argv'])
63 for k in k0 - varying_keys:
64 if r0[k] != r1.get(k):
65 warn("search context differs: %s (%s vs %s)" % (k, r0[k], r1[k]))
68 def score_equal(s1, s2):
69 return abs(s1-s2) < 1e-6
72 def merge_match(m0, m1, keep):
73 """Merge a particular spectrum match from m1 into m0."""
74 m0[0]['comparisons'] += m1[0]['comparisons']
76 # NB: This merge code, and score_equal above, must do the same merge as
77 # the merge code in cgreylag.cpp:evaluate_peptide!
79 assert keep >= 1
80 matches = m0[1] + m1[1]
81 matches.sort(key=lambda x: x['score'], reverse=True)
83 merged_matches = [matches.pop()]
84 while len(merged_matches) < keep and matches:
85 c0, c1 = merged_matches[-1], matches.pop()
86 if (score_equal(c0['score'], c1['score'])
87 and c0['peptide_sequence'] == c1['peptide_sequence']
88 and c0.get('mass_trace') == c1.get('mass_trace')):
89 continue
90 merged_matches.append(c1)
93 def merge_matches(m0, m1, keep):
94 """Merge the match information in m1 into m0."""
95 for m1k, m1v in m1.iteritems():
96 if m1k not in m0:
97 m0[m1k] = m1v
98 else:
99 merge_match(m0[m1k], m1v, keep)
102 def main(args=sys.argv[1:]):
103 parser = optparse.OptionParser(usage=
104 "usage: %prog [options] [<result-file>...]"
105 " <output-result-file>",
106 description=__doc__, version=__version__)
107 pa = parser.add_option
108 pa("-f", "--files-on-stdin", action="store_true", dest="files_on_stdin",
109 help="read filenames to be merged from stdin, one per line, instead of"
110 " from the command-line.")
111 pa("-v", "--verbose", action="store_true", dest="verbose",
112 help="be verbose")
113 pa("--copyright", action="store_true", dest="copyright",
114 help="print copyright and exit")
115 (options, args) = parser.parse_args(args=args)
117 if options.copyright:
118 print __copyright__
119 sys.exit(0)
121 if options.files_on_stdin:
122 if len(args) != 1:
123 parser.print_help()
124 sys.exit(1)
125 filenames = [ l.strip() for l in sys.stdin ]
126 if len(filenames) < 1:
127 error("--files-on-stdin given, but stdin was empty")
128 result_fn_0 = filenames[0]
129 result_fn_1_N = filenames[1:]
130 else:
131 if len(args) < 2:
132 parser.print_help()
133 sys.exit(1)
134 result_fn_0 = args[0]
135 result_fn_1_N = args[1:-1]
137 output_fn = args[-1]
139 # spectrum name -> match list/spectrum info
140 matches = {}
141 with contextlib.closing(open(result_fn_0)) as r_file:
142 r0 = cPickle.load(r_file)
143 total_comparisons = r0['total comparisons']
144 keep = r0['parameters']['best_result_count']
145 matches = r0['matches']
146 if options.verbose:
147 print >> sys.stderr, "loaded", result_fn_0
149 for additional_result_fn in result_fn_1_N:
150 with contextlib.closing(open(additional_result_fn)) as r1_file:
151 r1 = cPickle.load(r1_file)
152 check_consistency(r0, r1)
153 total_comparisons += r1['total comparisons']
154 keep = max(keep, r1['parameters']['best_result_count'])
155 merge_matches(matches, r1['matches'], keep)
156 if options.verbose:
157 print >> sys.stderr, "merged", additional_result_fn
159 del r1 # free some memory
161 r0['matches'] = matches
162 r0['total comparisons'] = total_comparisons
163 with contextlib.closing(open(output_fn, 'w')) as output_file:
164 pk = cPickle.Pickler(output_file, cPickle.HIGHEST_PROTOCOL)
165 pk.fast = 1 # stipulate no circular references
166 pk.dump(r0)
167 if options.verbose:
168 print >> sys.stderr, "dumped", output_fn
171 if __name__ == '__main__':
172 main()