1 #!/usr/bin/env greylag-python
3 '''Merge a set of greylag search result files, producing a search result file
4 that summarizes the best matches found.
8 from __future__
import with_statement
11 greylag, Copyright (C) 2006-2007, Stowers Institute for Medical Research
13 This program is free software; you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation; either version 2 of the License, or
16 (at your option) any later version.
18 This program is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
23 You should have received a copy of the GNU General Public License along
24 with this program; if not, write to the Free Software Foundation, Inc.,
25 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
36 from pprint
import pprint
41 print >> sys
.stderr
, 'warning:', s
43 sys
.exit('error: ' + s
)
46 def check_consistency(r0
, r1
):
47 """Warn on any inconsistencies between the two results that are about to
50 if r0
['version'] != r1
['version']:
51 warn("merging runs searched with different greylag versions!"
52 " (rerunning with same version strongly recommended)")
56 warn("mismatched keys: %s" % (k0 ^ k1
))
58 varying_keys
= set(['matches', 'total comparisons', 'argv'])
59 for k
in k0
- varying_keys
:
60 if r0
[k
] != r1
.get(k
):
61 warn("search context differs: %s (%s vs %s)" % (k
, r0
[k
], r1
[k
]))
64 def score_equal(s1
, s2
):
65 return abs(s1
-s2
) < 1e-6
68 def merge_match(m0
, m1
):
69 """Merge a particular spectrum match from m1 into m0."""
70 m0
[0]['comparisons'] += m1
[0]['comparisons']
72 # NB: This merge code, and score_equal above, must functionally match the
73 # merge code in cgreylag.cpp:evaluate_peptide!
75 keep
= max(len(m0
), len(m1
))
77 matches
= m0
[1] + m1
[1]
78 matches
.sort(key
=lambda x
: x
['score'], reverse
=True)
80 merged_matches
= [matches
.pop()]
81 while len(merged_matches
) < keep
and matches
:
82 c0
, c1
= merged_matches
[-1], matches
.pop()
83 if (score_equal(c0
['score'], c1
['score'])
84 and c0
['peptide_sequence'] == c1
['peptide_sequence']
85 and c0
['mass_trace'] == c1
['mass_trace']):
87 merged_matches
.append(c1
)
90 def merge_matches(m0
, m1
):
91 """Merge the match information in m1 into m0."""
92 for m1k
, m1v
in m1
.iteritems():
96 merge_match(m0
[m1k
], m1v
)
99 def main(args
=sys
.argv
[1:]):
100 parser
= optparse
.OptionParser(usage
=
101 "usage: %prog [options] [<result-file>...]"
102 " <output-result-file>",
103 description
=__doc__
, version
=__version__
)
104 pa
= parser
.add_option
105 pa("-f", "--files-on-stdin", action
="store_true", dest
="files_on_stdin",
106 help="read filenames to be merged from stdin, one per line, instead of"
107 " from the command-line.")
108 pa("-v", "--verbose", action
="store_true", dest
="verbose",
110 pa("--copyright", action
="store_true", dest
="copyright",
111 help="print copyright and exit")
112 (options
, args
) = parser
.parse_args(args
=args
)
114 if options
.copyright
:
118 if options
.files_on_stdin
:
122 filenames
= [ l
.strip() for l
in sys
.stdin
]
123 if len(filenames
) < 1:
124 error("--files-on-stdin given, but stdin was empty")
125 result_fn_0
= filenames
[0]
126 result_fn_1_N
= filenames
[1:]
131 result_fn_0
= args
[0]
132 result_fn_1_N
= args
[1:-1]
136 # spectrum name -> match list/spectrum info
138 with contextlib
.closing(gzip
.open(result_fn_0
)) as r_file
:
139 r0
= cPickle
.load(r_file
)
140 matches
= r0
['matches']
141 total_comparisons
= r0
['total comparisons']
143 print >> sys
.stderr
, "loaded", result_fn_0
145 for additional_result_fn
in result_fn_1_N
:
146 with contextlib
.closing(gzip
.open(additional_result_fn
)) as r1_file
:
147 r1
= cPickle
.load(r1_file
)
148 check_consistency(r0
, r1
)
149 merge_matches(matches
, r1
['matches'])
150 total_comparisons
+= r1
['total comparisons']
152 print >> sys
.stderr
, "merged", additional_result_fn
154 r0
['matches'] = matches
155 r0
['total comparisons'] = total_comparisons
156 with contextlib
.closing(gzip
.open(output_fn
, 'w')) as output_file
:
157 cPickle
.dump(r0
, output_file
, cPickle
.HIGHEST_PROTOCOL
)
159 print >> sys
.stderr
, "dumped", output_fn
162 if __name__
== '__main__':