1 #!/usr/bin/env greylag-python
3 '''Create a trivial index giving the starting point of each spectrum, as a
4 byte offset from the file beginning. (The index is stored in Python pickle
5 format, compressed with gzip.) Also checks that spectra names are unique
6 and that spectra are ordered by name, which other greylag programs assume.
9 from __future__
import with_statement
12 greylag, Copyright (C) 2006-2007, Stowers Institute for Medical Research
14 This program is free software; you can redistribute it and/or modify
15 it under the terms of the GNU General Public License as published by
16 the Free Software Foundation; either version 2 of the License, or
17 (at your option) any later version.
19 This program is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 GNU General Public License for more details.
24 You should have received a copy of the GNU General Public License along
25 with this program; if not, write to the Free Software Foundation, Inc.,
26 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
42 sys
.exit('error: ' + s
)
45 def main(args
=sys
.argv
[1:]):
46 parser
= optparse
.OptionParser(usage
=
47 "usage: %prog [options] <ms2-file>...",
49 pa
= parser
.add_option
50 pa("--copyright", action
="store_true", dest
="copyright",
51 help="print copyright and exit")
52 pa("--version", action
="store_true", dest
="version",
53 help="print version and exit")
54 (options
, args
) = parser
.parse_args(args
=args
)
64 or any(True for f
in args
if not f
.endswith('.ms2'))):
69 with
open(fn
) as specfile
:
70 contents
= specfile
.read()
74 with contextlib
.closing(gzip
.open(fn
+ '.idx', 'w')) as idx
:
75 ms
= [ m
for m
in re
.finditer('^:.*$', contents
, re
.MULTILINE
) ]
76 specnames
= [ m
.group() for m
in ms
]
77 if len(set(specnames
)) < len(ms
):
78 error("duplicate spectrum names not allowed")
79 if specnames
!= sorted(specnames
):
80 error("spectra must be ordered by name")
81 offsets
= [ m
.start() for m
in ms
]
82 cPickle
.dump({ 'offsets' : offsets
,
83 'file size' : os
.path
.getsize(fn
) },
84 idx
, cPickle
.HIGHEST_PROTOCOL
)
87 if __name__
== '__main__':