2 cdef extern from "stdio.h":
5 FILE
*fopen
(char *,char *)
6 FILE
*fdopen
(int,char *)
8 int sscanf
(char *str,char *fmt
,...)
9 int sprintf
(char *str,char *fmt
,...)
10 char *fgets
(char *str,int size
,FILE
*ifile
)
13 cdef extern from "ctype.h":
17 cdef extern from "string.h":
18 char *strcpy
(char *,char *)
22 def read_fasta_lengths
(d
, pyfile
, filename
):
23 'read seq lengths from python file object, save into dictionary d'
25 cdef long long seqLength
,ipos
,offset
# MUST USE 64-BIT INT!!!
26 cdef char tmp
[32768],fastastart
[4],*p
27 cdef FILE
*ifile
,*ifile2
29 ifile
= fdopen
(pyfile
.fileno
(),'r') # get FILE * from python file object
31 raise IOError('unable to open %s' % filename
)
32 outfile
=filename
+'.pureseq'
33 ifile2
=fopen
(outfile
,'wb') # save in binary mode, though shouldn't matter
35 raise IOError('unable to create %s' % (filename
+'.pureseq'))
39 strcpy
(fastastart
,'>')
40 p
=fgets
(tmp
,32767,ifile
) # read the first line of the FASTA file
42 if fastastart
[0]==p
[0]: #NEW SEQUENCE
43 if id is not None
and seqLength
>0:
44 d
[id]=seqLength
,offset
# SAVE THIS SEQ LENGTH
45 id=str(p
+1).split
()[0]
51 if isprint
(p
[i
]) and not isspace
(p
[i
]):
56 p
=fgets
(tmp
,32767,ifile
) # read the next line of the FASTA file
57 if id is not None
and seqLength
>0:
58 d
[id]=seqLength
,offset
# SAVE THIS SEQ LENGTH