4 Given a set of sqt files, determine a 'valid' set of identifications that
5 satisfy the specified FDR (false discovery rate) and other specified criteria.
6 Optionally rewrite the sqt files with invalid spectra marked 'N' or deleted
9 Note that a peptide end with an invalid flanking residue (currently any of
10 BJOUXZ) is not considered tryptic if it's an N-terminal end and is considered
11 tryptic if it's a C-terminal end and otherwise qualifies. (Flanking '-' is
12 always considered tryptic.)
16 from __future__
import with_statement
19 greylag, a collection of programs for MS/MS protein analysis
20 Copyright (C) 2006-2008 Stowers Institute for Medical Research
22 This program is free software: you can redistribute it and/or modify
23 it under the terms of the GNU General Public License as published by
24 the Free Software Foundation, either version 3 of the License, or
25 (at your option) any later version.
27 This program is distributed in the hope that it will be useful,
28 but WITHOUT ANY WARRANTY; without even the implied warranty of
29 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30 GNU General Public License for more details.
32 You should have received a copy of the GNU General Public License
33 along with this program. If not, see <http://www.gnu.org/licenses/>.
36 Stowers Institute for Medical Research
38 Kansas City, Missouri 64110
43 from collections
import defaultdict
47 from pprint
import pprint
54 # allow this script to be run even if not "installed"
56 from greylag
import VERSION
62 print >> sys
.stderr
, 'warning:', s
64 sys
.exit('error: ' + s
)
67 def generate_spectra_from_files(sqt_filenames
):
68 """Yield tuples (filename, H+_lines, S_line, [(M_line, mod_lines, L_lines),
69 ...]) for the given SQT files. H+_lines, for example, is a list
70 containing H lines and possibly other kinds of lines up to the next S
71 line. (mod_lines are non-L lines following an M-line.)
73 for sqt_filename
in sqt_filenames
:
74 with
open(sqt_filename
) as sqtf
:
75 sqt_filename
= os
.path
.splitext(sqt_filename
)[0]
77 S_line
, ML_lines
= None, []
79 if line
.startswith('L\t'):
80 ML_lines
[-1][2].append(line
)
81 elif line
.startswith('M\t'):
82 ML_lines
.append((line
, [], []))
83 elif line
.startswith('S\t'):
85 yield (sqt_filename
, H_lines
, S_line
, ML_lines
)
86 S_line
, ML_lines
= None, []
90 # read headers up to first S line
93 # other kind of line following M line
94 ML_lines
[-1][1].append(line
)
96 yield (sqt_filename
, H_lines
, S_line
, ML_lines
)
99 def write_spectra_to_files(spectra
):
100 """Given a generator of tuples of the form generated by
101 'generate_spectra_from_files', write them to the implied files. If any of
102 those files already exist, they are renamed with a '.bak' suffix. Temp
103 files are written first, then renamed at the end.
106 tmp_suffix
= '.tmp.%s' % time
.time()
111 for sqt_filename
, H_lines
, S_line
, ML_lines
in spectra
:
112 sqtf
= filemap
.get(sqt_filename
)
114 sqtf
= filemap
[sqt_filename
] = open(sqt_filename
+ tmp_suffix
, 'w')
118 for m_line
, mod_lines
, l_lines
in ML_lines
:
125 for fd
in filemap
.values():
128 if os
.path
.exists(fn
):
129 os
.rename(fn
, fn
+ '.bak')
130 os
.rename(fn
+ tmp_suffix
, fn
)
133 def reset_marks(spectra
):
134 """Given a generator of tuples of the form generated by
135 'generate_spectra_from_files', yield them with the M line marks
136 (destructively) reset to U.
139 def reset_mark(m_line
):
140 return re
.sub(r
'\S(\s*)$', r
'U\1', m_line
, 1)
142 for sqt_filename
, H_lines
, S_line
, ML_lines
in spectra
:
143 ML_lines_1
= [ (reset_mark(M_line
), mod_lines
, L_lines
)
144 for M_line
, mod_lines
, L_lines
in ML_lines
]
145 yield (sqt_filename
, H_lines
, S_line
, ML_lines_1
)
148 def set_marks(options
, valid_spectrum_info
, spectra
, kill
=False):
149 """Given a generator of tuples 'spectra' of the form generated by
150 'generate_spectra_from_files', yield them with the M line marks
151 (destructively) set to 'N', for spectra not listed in
152 valid_spectrum_info. If 'kill', omit marked spectra altogether.
155 def set_mark(m_line
):
156 return re
.sub(r
'\S(\s*)$', r
'N\1', m_line
, 1)
158 passing_spectrum_numbers
= frozenset(si
[0] for si
in valid_spectrum_info
)
160 for spectrum_no
, (sqt_filename
, H_lines
, S_line
, ML_lines
) \
161 in enumerate(spectra
):
162 mark_spectrum
= spectrum_no
not in passing_spectrum_numbers
163 if mark_spectrum
and not kill
:
164 ML_lines
= [ (set_mark(M_line
), mod_lines
, L_lines
)
165 for M_line
, mod_lines
, L_lines
in ML_lines
]
166 if not (kill
and mark_spectrum
):
167 yield (sqt_filename
, H_lines
, S_line
, ML_lines
)
170 nulltrans
= string
.maketrans('','')
171 non_aa_chars
= string
.digits
+ string
.punctuation
174 """Given a peptide, return just the unmodified peptide seen."""
175 # assuming no control chars present
176 return s
.translate(nulltrans
, non_aa_chars
)
179 def get_spectrum_info(decoy_prefix
, minimum_trypticity
, spectra
):
180 """Return a pair, the first a dict mapping each charge to a list of
181 (score, delta, state), where state is 'real' or 'decoy', for all the
182 spectra in sqt_fns, and the second a list of (charge, score, delta), one
183 for each spectrum, given a generator of tuples 'spectra' of the form
184 generated by 'generate_spectra_from_files'. Spectra lacking
185 minimum_trypticity are dropped.
188 ## charge -> [ (score, delta, state), ... ]
189 ## where state is either 'real' or 'decoy'
190 #z_scores = defaultdict(list)
192 ## information about spectra, in file order--None iff spectrum was filtered
194 ## [ (charge, score, delta) or None, ... ]
197 # [ (spectrum_no, charge, score, delta, state, stripped_peptide), ... ]
200 for spectrum_no
, (sqt_filename
,
201 H_lines
, S_line
, ML_lines
) in enumerate(spectra
):
202 S_fields
= S_line
.split('\t')
203 current_charge
= int(S_fields
[3])
204 actual_mass
= float(S_fields
[6])
205 spectrum_name
= '.'.join([sqt_filename
] + S_fields
[1:4])
209 current_peptide_trypticity
= None
210 current_state
= set()
212 highest_rank_seen
= 0
213 best_peptide_seen
= None
214 current_peptide
= None
216 for M_line
, mod_lines
, L_lines
in ML_lines
:
217 M_fields
= M_line
.split('\t')
219 rank
= int(M_fields
[1])
220 predicted_mass
= float(M_fields
[3])
221 score
= float(M_fields
[5])
222 peptide
= M_fields
[9].strip().upper() # e.g., A.B@CD*.-
223 #delta = float(M_fields[4])
224 #sp_rank = int(M_fields[2])
225 assert score
>= 0 and rank
>= 1
227 if rank
> highest_rank_seen
+ 1:
228 # ignore aux top SpRank hits, because delta confounds
230 highest_rank_seen
= rank
232 if current_score
== None:
234 break # spectrum is total crap
235 current_score
= score
237 # Don't trust delta from input file.
238 delta
= (current_score
- score
) / current_score
240 assert peptide
[1] == '.' and peptide
[-2] == '.'
241 peptide_flanks
= (peptide
[0], peptide
[-1]) # ('A', '-')
242 peptide
= peptide
[2:-2] # 'B@CD*'
244 if current_peptide
== None:
245 current_peptide
= peptide
246 current_peptide_trypticity
= 0
247 # NB: K*, R* not tryptic!
248 if (peptide_flanks
[0] in ('K', 'R') and peptide
[0] != 'P'
249 or peptide_flanks
[0] == '-'):
250 current_peptide_trypticity
+= 1
251 if (peptide
[-1] in ('K', 'R') and peptide_flanks
[1] != 'P'
252 or peptide_flanks
[1] == '-'):
253 current_peptide_trypticity
+= 1
255 # Choose delta from first M line such that peptide differs from
256 # peptide in initial (top) M line. We consider differently
257 # modified versions of the same peptide to be different (at least
259 if peptide
!= current_peptide
:
260 current_delta
= delta
263 for L_line
in L_lines
:
264 locus
= L_line
.split('\t')[1]
265 current_loci
.add(locus
)
266 if locus
.startswith(decoy_prefix
):
267 current_state
.add('decoy')
269 current_state
.add('real')
271 if (current_score
!= None and current_delta
!= None
272 and current_peptide_trypticity
>= minimum_trypticity
):
274 if len(current_state
) == 2:
275 current_state
= 'both'
277 assert len(current_state
) == 1
278 current_state
= current_state
.pop()
280 spectrum_info
.append((spectrum_no
, spectrum_name
, current_charge
,
281 current_score
, current_delta
, current_state
,
283 strip_mods(current_peptide
),
284 actual_mass
, actual_mass
-predicted_mass
,
285 frozenset(current_loci
)))
291 # - repeat, adjusting fdr upwards (or downwards?) to meet user goal
292 # repeat until set of valid spectra converges:
293 # - do pr filter of spectra
294 # - if not first time, and no spectra filtered out, break
295 # - separately for each charge, choose valid spectra to maximize real ids subject to FDR
296 # - saturate (locally or completely)
299 def PPV(reals
, decoys
):
300 """Returns the estimated Positive Predictive Value (== 1 - FDR), given
301 counts of reals and decoys."""
303 # We know that the decoys are false positives, and we estimate that an
304 # equal number of the "reals" are actually false, too.
305 false_positives
= 2*decoys
306 true_positives
= reals
- decoys
308 if true_positives
<= 0:
310 return float(true_positives
) / (true_positives
+ false_positives
)
313 def calculate_inner_threshold(fdr
, charge
, spinfo
):
316 spinfo
= sorted(spinfo
, key
=lambda x
: x
[3])
320 real_count
= sum(1 for x
in spinfo
if x
[5] == 'real')
321 decoy_count
= sum(1 for x
in spinfo
if x
[5] == 'decoy')
323 current_threshold
= -1e100
# allow all spectra
324 for n
, sp
in enumerate(spinfo
):
326 return (None, real_count
, decoy_count
) # give up
328 ppv_est
= PPV(real_count
, decoy_count
)
329 if ppv_est
>= ppv_goal
:
333 elif sp
[5] == 'decoy':
335 # set threshold just high enough to exclude this spectrum
336 current_threshold
= sp
[3] + epsilon
338 current_threshold
= spinfo
[-1][3] + epsilon
# couldn't meet goal
340 return (current_threshold
, real_count
, decoy_count
)
343 def calculate_combined_thresholds(fdr
, options
, spectrum_info
):
344 """Find best score/delta thresholds for each charge."""
348 # Rather than search every possible value of delta, we're only going to
349 # "sample" at this granularity. This cuts search time dramatically (and
350 # makes it O(n) instead of O(n**2). Extra precision wouldn't really be
351 # useful in any case.
352 SEARCH_GRANULARITY
= 0.001
354 # charge -> (score, delta, passing_reals, passing_decoys)
357 by_charge
= lambda x
: x
[2]
358 spectrum_info
.sort(key
=by_charge
)
359 for charge
, spinfo
in itertools
.groupby(spectrum_info
, key
=by_charge
):
360 spinfo0
= sorted(spinfo
, key
=lambda x
: x
[4], reverse
=True)
365 this_value
= spinfo0
[-1][4] # current delta
366 if (last_value
== None
367 or abs(this_value
- last_value
) >= SEARCH_GRANULARITY
):
370 r
= calculate_inner_threshold(fdr
, charge
, spinfo0
)
371 threshold
, real_count
, decoy_count
= r
372 if threshold
!= None:
374 print '#', charge
, threshold
, this_value
, \
375 real_count
, decoy_count
376 if (charge
not in thresholds
377 or real_count
> thresholds
[charge
][2]):
378 thresholds
[charge
] = (threshold
, this_value
,
379 real_count
, decoy_count
)
380 last_value
= this_value
386 def flatten(gen_of_gens
):
387 for gen
in gen_of_gens
:
392 def redundancy_filter(options
, spectrum_info
):
393 if options
.minimum_spectra_per_locus
< 2:
394 return spectrum_info
[:]
396 # si = (spectrum_no, spectrum_name, charge, score, delta, state,
397 # peptide, stripped_peptide, actual_mass, mass_delta, loci)
399 Rs
= sorted(flatten(((locus
, si
) for locus
in si
[10])
400 for si
in spectrum_info
))
403 for locus
, locus_Rs
in itertools
.groupby(Rs
, key
=lambda x
: x
[0]):
404 list_locus_si
= [ si
for locus
, si
in locus_Rs
]
405 if len(list_locus_si
) < options
.minimum_spectra_per_locus
:
407 result |
= set(list_locus_si
)
408 return sorted(result
)
411 def saturate_by_prefix_suffix(spectrum_info
, saturation_spectrum_info
):
412 PREFIX_LENGTH
= 16 # empirically seems good
413 peptide_prefixes
= set(si
[7][:PREFIX_LENGTH
] for si
in spectrum_info
)
414 sat_si
= sorted(ssi
for ssi
in saturation_spectrum_info
415 if ssi
[7][:PREFIX_LENGTH
] in peptide_prefixes
)
417 print ("saturation by prefix (w/%s): %s -> %s"
418 % (len(saturation_spectrum_info
), len(spectrum_info
), len(sat_si
)))
422 def saturate(spectrum_info
, saturation_spectrum_info
):
423 peptides
= set(si
[7] for si
in spectrum_info
)
424 # FIX: does this actually need to be sorted?
425 sat_si
= sorted(ssi
for ssi
in saturation_spectrum_info
426 if ssi
[7] in peptides
)
427 print ("saturation (w/%s): %s -> %s"
428 % (len(saturation_spectrum_info
), len(spectrum_info
), len(sat_si
)))
429 return saturate_by_prefix_suffix(sat_si
, saturation_spectrum_info
)
434 print len(si
), sum(1 for x
in si
if x
[5] == 'real'), sum(1 for x
in si
if x
[5] == 'decoy')
437 def try_fdr(fdr_guess
, options
, remaining_spectrum_info
, saturation_spectrum_info
=None):
438 print "try_fdr:", fdr_guess
439 debug_fdr(remaining_spectrum_info
)
441 remaining_spectrum_info_1
= None
443 thresholds
= calculate_combined_thresholds(fdr_guess
, options
,
444 remaining_spectrum_info
)
445 remaining_spectrum_info
= [ si
for si
in remaining_spectrum_info
446 if (si
[2] in thresholds
447 and si
[3] >= thresholds
[si
[2]][0]
448 and si
[4] >= thresholds
[si
[2]][1]) ]
449 debug_fdr(remaining_spectrum_info
)
450 if remaining_spectrum_info
== remaining_spectrum_info_1
:
452 remaining_spectrum_info_1
= redundancy_filter(options
,
453 remaining_spectrum_info
)
454 debug_fdr(remaining_spectrum_info_1
)
455 if len(remaining_spectrum_info_1
) == len(remaining_spectrum_info
):
457 remaining_spectrum_info
= remaining_spectrum_info_1
459 if not options
.no_saturation
and saturation_spectrum_info
:
460 remaining_spectrum_info_1
= saturate(remaining_spectrum_info_1
,
461 saturation_spectrum_info
)
462 debug_fdr(remaining_spectrum_info_1
)
464 total_reals
= sum(1 for si
in remaining_spectrum_info_1
466 total_decoys
= sum(1 for si
in remaining_spectrum_info_1
468 #total_reals = sum(thresholds[charge][2] for charge in thresholds)
469 #total_decoys = sum(thresholds[charge][3] for charge in thresholds)
470 fdr_result
= 1 - PPV(total_reals
, total_decoys
)
472 return (fdr_result
, thresholds
, total_reals
, total_decoys
,
473 remaining_spectrum_info_1
)
476 # FIX: at various points here we may have a goal of a minimum number of real
477 # ids, based on goal FDR or whatever. We can pass this inward to do
478 # branch-and-bound, which may save time.
480 # FIX: this is now somewhat slower than before--any way to speed it up?
482 def search_adjusting_fdr(options
, spectrum_info_0
):
483 """Repeatedly calculate thresholds while adjusting FDR, looking for an FDR
484 guess that results in an appropriate goal FDR. This might be needed
485 because the thresholds found for per-charge FDRs will be saturated or have
486 quantization errors, resulting in overly conservative thresholds. Returns
487 a list of items from spectrum_info that are considered "valid".
490 # FIX: Does this generate enough additional real ids to be worth the
491 # computational cost?
492 #FDR_INFLATION_FACTOR = 1.1
493 FDR_INFLATION_FACTOR
= 3
494 assert FDR_INFLATION_FACTOR
> 1
496 spectrum_info
= redundancy_filter(options
, spectrum_info_0
)
498 low_fdr
= options
.fdr
499 # FIX: spectrum_info_0 has already been filtered for tryptic status
500 # Should we use an unfiltered set instead?
501 low_results
= try_fdr(low_fdr
, options
, spectrum_info
, spectrum_info_0
)
503 if options
.adjust_fdr
:
504 high_fdr
= min(1.0, options
.fdr
* FDR_INFLATION_FACTOR
)
505 high_results
= try_fdr(high_fdr
, options
, spectrum_info
,
507 initial_total_reals
= low_results
[2]
511 print ("adjust infl fdr %s %s -> %s"
512 % (low_fdr
, high_fdr
, high_results
[0]))
513 if high_results
[0] >= options
.fdr
:
515 low_fdr
, low_results
= high_fdr
, high_results
516 high_fdr
= min(1.0, high_fdr
* FDR_INFLATION_FACTOR
)
519 high_results
= try_fdr(high_fdr
, options
, spectrum_info
,
522 warn("FDR adjustment inflation failed")
529 CONVERGENCE_FACTOR
= 0.01
530 assert CONVERGENCE_FACTOR
> 0
532 assert high_fdr
>= low_fdr
533 if (high_fdr
- low_fdr
) <= CONVERGENCE_FACTOR
*options
.fdr
:
536 # - seems to be slower than the naive method?
537 # Bias is empirically determined. It's supposed to speed
538 # convergence, but should not affect the results.
539 # FIX: better method possible?
541 #prop = ((options.fdr - low_results[0])
542 # / (high_results[0] - low_results[0]))
543 #prop = min(1, max(0, prop))
544 #guess_fdr = low_fdr + prop**GUESS_BIAS * (high_fdr - low_fdr)
547 guess_fdr
= (high_fdr
+ low_fdr
) / 2.0
549 guess_results
= try_fdr(guess_fdr
, options
, spectrum_info
,
552 print ("adjust fdr %s %s %s -> %s"
553 % (low_fdr
, guess_fdr
, high_fdr
, guess_results
[0]))
554 if guess_results
[0] > options
.fdr
:
555 high_fdr
, high_results
= guess_fdr
, guess_results
557 low_fdr
, low_results
= guess_fdr
, guess_results
559 warn("FDR adjustment convergence failed")
561 (fdr_result
, thresholds
, total_reals
,
562 total_decoys
, valid_spectrum_info
) = low_results
564 if options
.output_peptides
:
565 valid_spectrum_info
.sort(key
=lambda x
: x
[1])
566 for (spectrum_no
, spectrum_name
, charge
, score
, delta
, state
,
567 peptide
, stripped_peptide
, actual_mass
,
568 mass_delta
, loci
) in valid_spectrum_info
:
569 print >> options
.output_peptides
, \
570 ("%+d %s %s %s %.8f %.8f"
571 % (charge
, spectrum_name
, state
, peptide
, actual_mass
,
574 for charge
in sorted(thresholds
.keys()):
575 score_threshold
, delta_threshold
= thresholds
[charge
][0:2]
576 reals
, decoys
= thresholds
[charge
][2:4]
577 print ("%+d: score %f, delta %.6f -> %s real ids, %s decoys"
579 % (charge
, score_threshold
, delta_threshold
,
580 reals
, decoys
, 0.5 * (1 - PPV(reals
, decoys
))))
581 print ("# total: %s real ids, %s decoys (fdr %.4f)"
582 % (total_reals
, total_decoys
, fdr_result
/ 2.0))
583 if options
.adjust_fdr
:
584 print ("# (FDR adjustment found %s more real ids)"
585 % (total_reals
- initial_total_reals
))
587 return valid_spectrum_info
590 def main(args
=sys
.argv
[1:]):
591 parser
= optparse
.OptionParser(usage
=
592 "usage: %prog [options] <sqt-file>...",
593 description
=__doc__
, version
=VERSION
)
594 pa
= parser
.add_option
595 DEFAULT_DECOY_PREFIX
= "SHUFFLED_"
596 pa("--decoy-prefix", dest
="decoy_prefix", default
=DEFAULT_DECOY_PREFIX
,
597 help='prefix given to locus name of decoy (e.g., shuffled) database'
598 ' sequences [default=%r]' % DEFAULT_DECOY_PREFIX
, metavar
="PREFIX")
600 pa("--fdr", dest
="fdr", type="float", default
=DEFAULT_FDR
,
601 help="false discovery rate [default=%s] (Note that this is the"
602 " final resulting FDR after the decoys have been removed" % DEFAULT_FDR
,
603 metavar
="PROPORTION")
604 pa("--adjust-fdr", action
="store_true", dest
="adjust_fdr",
605 help="try to adjust internal FDR to achieve a final FDR closer to"
606 " requested FDR (this takes longer, but may find more read ids in some"
608 pa("--no-saturation", action
="store_true", dest
="no_saturation",
609 help="skip peptide saturation")
610 pa("-v", "--verbose", action
="store_true", dest
="verbose",
612 pa("--output-peptides", dest
="output_peptides",
613 help="output information about passing peptides to specified file"
614 " ('-' for stdout)", metavar
="FILENAME")
615 pa("-t", "--minimum-trypticity", type="choice", choices
=('0', '1', '2'),
616 default
='0', dest
="minimum_trypticity",
617 help="drop peptides with too few tryptic ends"
618 " (2=fully tryptic, 1=semi-tryptic, 0=any)", metavar
="TRYPTICITY")
619 DEFAULT_MINIMUM_SPECTRA_PER_LOCUS
= 1
620 pa("-p", "--minimum-spectra-per-locus", dest
="minimum_spectra_per_locus",
621 type="int", default
=DEFAULT_MINIMUM_SPECTRA_PER_LOCUS
,
622 help="only loci with at least this many spectra are included, and"
623 " spectra not associated with such a locus are excluded"
624 " [default=%s]" % DEFAULT_MINIMUM_SPECTRA_PER_LOCUS
,
626 #pa("--graph", dest="graph",
627 # help='create distribution graphs, using the specified prefix',
628 # metavar="PATH PREFIX")
629 pa("--debug", action
="store_true", dest
="debug",
630 help="show debug output")
631 pa("--mark", action
="store_true", dest
="mark",
632 help="rewrite the input files, changing some validation marks to 'N',"
633 " according to filtering")
634 pa("--reset-marks", action
="store_true", dest
="reset_marks",
635 help="rewrite the input files, changing all validation marks to 'U'")
636 pa("--kill", action
="store_true", dest
="kill",
637 help="rewrite the input files, removing spectra that don't pass"
638 " validation, according to filtering")
639 pa("--copyright", action
="store_true", dest
="copyright",
640 help="print copyright and exit")
641 (options
, args
) = parser
.parse_args(args
=args
)
643 if options
.copyright
:
647 if len(args
) < 1 or options
.minimum_spectra_per_locus
< 0:
651 if not (0.0 <= options
.fdr
< 0.5):
652 error("--fdr must be within range [0.0, 0.5)")
653 if (sum(1 for x
in (options
.mark
, options
.reset_marks
, options
.kill
) if x
)
655 error("only one of --mark, --reset-marks, --kill may be specified")
657 if options
.reset_marks
:
658 write_spectra_to_files(reset_marks(generate_spectra_from_files(args
)))
661 if options
.output_peptides
:
662 if options
.output_peptides
== '-':
663 options
.output_peptides
= sys
.stdout
665 options
.output_peptides
= open(options
.output_peptides
, 'w')
667 # This is to translate the "effective" FDR, which is the rate after decoys
668 # have been stripped out of the results (which is what users really care
669 # about), into our internal FDR, which includes decoys.
672 spectrum_info
= get_spectrum_info(options
.decoy_prefix
,
673 int(options
.minimum_trypticity
),
674 generate_spectra_from_files(args
))
677 print ('%s passing spectra, of which %s are not decoys'
678 % (len(spectrum_info
),
679 sum(1 for si
in spectrum_info
if si
[5] == 'real')))
681 # valid_spectrum_info is the list of spectrum_info elements that have been
683 valid_spectrum_info
= search_adjusting_fdr(options
, spectrum_info
)
686 valid_spectrum_info
.sort()
687 for (spectrum_no
, spectrum_name
, charge
, score
, delta
, state
,
688 peptide
, stripped_peptide
, actual_mass
,
689 mass_delta
, loci
) in valid_spectrum_info
:
690 print '#S', charge
, score
, delta
692 if options
.mark
or options
.kill
:
693 # Note that we are intentionally reading the input files again here,
694 # to avoid having to keep them all in memory.
695 write_spectra_to_files(set_marks(options
, valid_spectrum_info
,
696 generate_spectra_from_files(args
),
700 if __name__
== '__main__':