added iteration test to graph_test.py
[pygr.git] / pygr / seqfmt.pyx
blobf67e4510a8e05eb6f5f46ab7ebdde0b85cff3fcb
2 cdef extern from "stdio.h":
3 ctypedef struct FILE:
4 pass
5 FILE *fopen(char *,char *)
6 FILE *fdopen(int,char *)
7 int fclose(FILE *)
8 int sscanf(char *str,char *fmt,...)
9 int sprintf(char *str,char *fmt,...)
10 char *fgets(char *str,int size,FILE *ifile)
11 int fputc(int,FILE *)
13 cdef extern from "ctype.h":
14 int isspace(int)
15 int isprint(int)
17 cdef extern from "string.h":
18 char *strcpy(char *,char *)
22 def read_fasta_lengths(d, pyfile, filename):
23 'read seq lengths from python file object, save into dictionary d'
24 cdef int i
25 cdef long long seqLength,ipos,offset # MUST USE 64-BIT INT!!!
26 cdef char tmp[32768],fastastart[4],*p
27 cdef FILE *ifile,*ifile2
29 ifile = fdopen(pyfile.fileno(),'r') # get FILE * from python file object
30 if ifile==NULL:
31 raise IOError('unable to open %s' % filename)
32 outfile=filename+'.pureseq'
33 ifile2=fopen(outfile,'wb') # save in binary mode, though shouldn't matter
34 if ifile2==NULL:
35 raise IOError('unable to create %s' % (filename+'.pureseq'))
36 id=None
37 ipos=0
38 seqLength=0
39 strcpy(fastastart,'>')
40 p=fgets(tmp,32767,ifile) # read the first line of the FASTA file
41 while p:
42 if fastastart[0]==p[0]: #NEW SEQUENCE
43 if id is not None and seqLength>0:
44 d[id]=seqLength,offset # SAVE THIS SEQ LENGTH
45 id=str(p+1).split()[0]
46 offset=ipos
47 seqLength=0
48 else:
49 i=0
50 while p[i]:
51 if isprint(p[i]) and not isspace(p[i]):
52 seqLength=seqLength+1
53 fputc(p[i],ifile2)
54 ipos=ipos+1
55 i=i+1
56 p=fgets(tmp,32767,ifile) # read the next line of the FASTA file
57 if id is not None and seqLength>0:
58 d[id]=seqLength,offset # SAVE THIS SEQ LENGTH
59 fclose(ifile2)