3 '''Generate a set of sqt files from the specified greylag search result
4 file. The sqt files will be created in the current directory, with names
5 corresponding to the searched ms2 files.
8 from __future__
import with_statement
11 greylag, Copyright (C) 2006-2007, Stowers Institute for Medical Research
13 This program is free software; you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation; either version 2 of the License, or
16 (at your option) any later version.
18 This program is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
23 You should have received a copy of the GNU General Public License along
24 with this program; if not, write to the Free Software Foundation, Inc.,
25 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
37 from pprint
import pprint
42 print >> sys
.stderr
, 'warning:', s
44 sys
.exit('error: ' + s
)
47 def print_header(f
, r
):
48 print >> f
, "H\tSQTGenerator\tgreylag"
49 print >> f
, "H\tSQTGeneratorVersion\t%s" % __version__
50 for db
in r
['databases']:
51 print >> f
, "H\tDatabase\t%s" % db
52 for pk
, pv
in r
['parameters'].items():
53 print >> f
, "H\tParameter\t%s\t%s" % (pk
, pv
)
54 # How should these be specified?
55 # What does DTASelect do with these?
56 # H StaticMod C=160.1388
60 def print_spectrum(f
, spectrum
, sp_best_matches
):
61 scan_low
, scan_high
, _rest
= spectrum
['name'].split('.', 2)
62 print >> f
, '\t'.join(str(v
) for v
in
63 ["S", scan_low
, scan_high
, spectrum
['charge'], 0,
64 'honk', spectrum
['mass'],
65 round(math
.log(spectrum
['total_ion_current']), 4),
66 0, spectrum
['comparisons']])
68 best_scores
= sorted(list(set(m
['score'] for m
in sp_best_matches
)))
70 rank_map
= dict(zip(best_scores
, range(1,len(best_scores
)+1)))
72 best_score
= sp_best_matches
[0]['score']
73 prev_score
, prev_sequence
= None, None
74 for match
in sp_best_matches
:
75 score
= match
['score']
78 rank
= rank_map
[score
]
79 score_delta
= (best_score
- score
) / best_score
81 # This is a temporary way of stripping duplicates. It only works if
82 # they are adjacent (which will only rarely be false?).
83 if score
!= prev_score
or match
['peptide_sequence'] != prev_sequence
:
84 # Note: scores, being log probability, are non-positive, but we
85 # flip the sign in the SQT output
86 print >> f
, '\t'.join(str(v
) for v
in
88 round(match
['predicted_parent_mass'], 5),
89 round(score_delta
, 4), round(-score
, 4), 0,
90 # 1 of 2 ions found--keep DTASelect happy
92 "-.%s.-" % match
['peptide_sequence'],
94 prev_score
, prev_sequence
= score
, match
['peptide_sequence']
96 print >> f
, 'L\t%s\t%s' % (match
['sequence_name'],
97 match
['peptide_begin'])
99 def main(args
=sys
.argv
[1:]):
100 parser
= optparse
.OptionParser(usage
=
101 "usage: %prog [options] <result-file>",
102 description
=__doc__
, version
=__version__
)
103 pa
= parser
.add_option
104 pa("-d", "--output-directory", dest
="output_directory",
105 help="directory where output files are written [default is '.']",
107 pa("-v", "--verbose", action
="store_true", dest
="verbose",
109 pa("--dump", action
="store_true", dest
="dump",
110 help="dump the result file (for debugging)")
111 pa("--copyright", action
="store_true", dest
="copyright",
112 help="print copyright and exit")
113 (options
, args
) = parser
.parse_args(args
=args
)
115 if options
.copyright
:
123 with contextlib
.closing(gzip
.open(args
[0])) as r_file
:
124 r
= cPickle
.load(r_file
)
130 spectrum_fns
= r
['spectrum files']
131 assert len(spectrum_fns
) == len(set(spectrum_fns
)) # check uniqueness
133 spectra
= r
['spectra']
134 best_matches
= r
['matches']['best_matches']
135 assert len(spectra
) == len(best_matches
)
137 # order spectra and best_matches by (filename, spectrum name)
138 def filename_specname_order(sm
):
139 return r
['spectrum files'][sm
[0]['file_id']], sm
[0]['name']
141 spec_match
= zip(spectra
, best_matches
)
142 spec_match
.sort(key
=filename_specname_order
)
144 for spectrum_n
, spectrum_fn
in enumerate(spectrum_fns
):
145 assert os
.path
.dirname(spectrum_fn
) == ''
146 sqt_fn
= os
.path
.splitext(spectrum_fn
)[0] + '.sqt'
147 with
open(sqt_fn
, 'w') as sqtf
:
148 print_header(sqtf
, r
)
149 for spectrum
, best_match
in spec_match
:
150 if spectrum
['file_id'] == spectrum_n
:
151 print_spectrum(sqtf
, spectrum
, best_match
)
154 if __name__
== '__main__':