1 #!/usr/bin/env greylag-python
3 '''Merge a set of greylag search result files, producing a search result file
4 that summarizes the best matches found.
8 from __future__
import with_statement
11 greylag, Copyright (C) 2006-2007, Stowers Institute for Medical Research
13 This program is free software; you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation; either version 2 of the License, or
16 (at your option) any later version.
18 This program is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
23 You should have received a copy of the GNU General Public License along
24 with this program; if not, write to the Free Software Foundation, Inc.,
25 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
35 from pprint
import pprint
40 print >> sys
.stderr
, 'warning:', s
42 sys
.exit('error: ' + s
)
45 def check_consistency(r0
, r1
):
46 """Warn on any inconsistencies between the two results that are about to
49 if r0
['version'] != r1
['version']:
50 warn("merging runs searched with different greylag versions!"
51 " (rerunning with same version strongly recommended)")
55 warn("mismatched keys: %s" % (k0 ^ k1
))
57 # FIX: this restriction goes away later
58 if r0
['spectrum files'] != r1
['spectrum files']:
59 error('merging across differing sets of ms2 files not yet implemented'
60 ' (%s vs %s)' % (r0
['spectrum files'], r1
['spectrum files']))
62 varying_keys
= set(['matches', 'total comparisons', 'argv'])
63 for k
in k0
- varying_keys
:
64 if r0
[k
] != r1
.get(k
):
65 warn("search context differs: %s (%s vs %s)" % (k
, r0
[k
], r1
[k
]))
68 def score_equal(s1
, s2
):
69 return abs(s1
-s2
) < 1e-6
72 def merge_match(m0
, m1
, keep
):
73 """Merge a particular spectrum match from m1 into m0."""
74 m0
[0]['comparisons'] += m1
[0]['comparisons']
76 # NB: This merge code, and score_equal above, must do the same merge as
77 # the merge code in cgreylag.cpp:evaluate_peptide!
80 matches
= m0
[1] + m1
[1]
81 matches
.sort(key
=lambda x
: x
['score'], reverse
=True)
83 merged_matches
= [matches
.pop()]
84 while len(merged_matches
) < keep
and matches
:
85 c0
, c1
= merged_matches
[-1], matches
.pop()
86 if (score_equal(c0
['score'], c1
['score'])
87 and c0
['peptide_sequence'] == c1
['peptide_sequence']
88 and c0
.get('mass_trace') == c1
.get('mass_trace')):
90 merged_matches
.append(c1
)
93 def merge_matches(m0
, m1
, keep
):
94 """Merge the match information in m1 into m0."""
95 for m1k
, m1v
in m1
.iteritems():
99 merge_match(m0
[m1k
], m1v
, keep
)
102 def main(args
=sys
.argv
[1:]):
103 parser
= optparse
.OptionParser(usage
=
104 "usage: %prog [options] [<result-file>...]"
105 " <output-result-file>",
106 description
=__doc__
, version
=__version__
)
107 pa
= parser
.add_option
108 pa("-f", "--files-on-stdin", action
="store_true", dest
="files_on_stdin",
109 help="read filenames to be merged from stdin, one per line, instead of"
110 " from the command-line.")
111 pa("-v", "--verbose", action
="store_true", dest
="verbose",
113 pa("--copyright", action
="store_true", dest
="copyright",
114 help="print copyright and exit")
115 (options
, args
) = parser
.parse_args(args
=args
)
117 if options
.copyright
:
121 if options
.files_on_stdin
:
125 filenames
= [ l
.strip() for l
in sys
.stdin
]
126 if len(filenames
) < 1:
127 error("--files-on-stdin given, but stdin was empty")
128 result_fn_0
= filenames
[0]
129 result_fn_1_N
= filenames
[1:]
134 result_fn_0
= args
[0]
135 result_fn_1_N
= args
[1:-1]
139 # spectrum name -> match list/spectrum info
141 with contextlib
.closing(open(result_fn_0
)) as r_file
:
142 r0
= cPickle
.load(r_file
)
143 total_comparisons
= r0
['total comparisons']
144 keep
= r0
['parameters']['best_result_count']
145 matches
= r0
['matches']
147 print >> sys
.stderr
, "loaded", result_fn_0
149 for additional_result_fn
in result_fn_1_N
:
150 with contextlib
.closing(open(additional_result_fn
)) as r1_file
:
151 r1
= cPickle
.load(r1_file
)
152 check_consistency(r0
, r1
)
153 total_comparisons
+= r1
['total comparisons']
154 keep
= max(keep
, r1
['parameters']['best_result_count'])
155 merge_matches(matches
, r1
['matches'], keep
)
157 print >> sys
.stderr
, "merged", additional_result_fn
159 del r1
# free some memory
161 r0
['matches'] = matches
162 r0
['total comparisons'] = total_comparisons
163 with contextlib
.closing(open(output_fn
, 'w')) as output_file
:
164 pk
= cPickle
.Pickler(output_file
, cPickle
.HIGHEST_PROTOCOL
)
165 pk
.fast
= 1 # stipulate no circular references
168 print >> sys
.stderr
, "dumped", output_fn
171 if __name__
== '__main__':