5 use enum
qw(:u_ refmethod refsource refgroup refseq refstart refstop refscore refstrand refphase qrystart qrystop sizes starts);
6 use enum
qw(:v_ refmethod refsource refgroup refseq refstrand refscore refphase txstart txstop cdsstart cdsstop exonstarts exonstops);
8 use enum
qw(:all_bacends__ x matches misMatches repMatches nCount qNumInsert tNumInsert tBaseInsert strand qName qSize qStart qEnd tName tSize tStart tEnd blockCount blockSizes qStarts tStarts);
9 use enum
qw(:all_est__ bin matches misMatches repMatches nCount qNumInsert qBaseInsert tNumInsert tBaseInsert strand qName qSize qStart qEnd tName tSize tStart tEnd blockCount blockSizes qStarts tStarts);
10 use enum
qw(:all_mrna__ bin matches misMatches repMatches nCount qNumInsert qBaseInsert tNumInsert tBaseInsert strand qName qSize qStart qEnd tName tSize tStart tEnd blockCount blockSizes qStarts tStarts);
11 use enum
qw(:all_sts_primer__ matches misMatches repMatches nCount qNumInsert qBaseInsert tNumInsert tBaseInsert strand qName qSize qStart qEnd tName tSize tStart tEnd blockCount blockSizes qStarts tStarts);
12 use enum
qw(:all_sts_seq__ matches misMatches repMatches nCount qNumInsert qBaseInsert tNumInsert tBaseInsert strand qName qSize qStart qEnd tName tSize tStart tEnd blockCount blockSizes qStarts tStarts);
13 use enum
qw(:bacEndPairs__ bin chrom chromStart chromEnd name score strand pslTable lfCount lfStarts lfSizes lfNames);
14 use enum
qw(:blatFish__ bin matches misMatches repMatches nCount qNumInsert qBaseInsert tNumInsert tBaseInsert strand qName qSize qStart qEnd tName tSize tStart tEnd blockCount blockSizes qStarts tStarts);
15 use enum
qw(:gap__ bin chrom chromStart chromEnd ix n size type bridge);
16 use enum
qw(:gl__ bin frag start end strand);
17 use enum
qw(:gold__ bin chrom chromStart chromEnd ix type frag fragStart fragEnd strand);
18 use enum
qw(:intronEst__ bin matches misMatches repMatches nCount qNumInsert qBaseInsert tNumInsert tBaseInsert strand qName qSize qStart qEnd tName tSize tStart tEnd blockCount blockSizes qStarts tStarts);
19 use enum
qw(:mrna__ bin matches misMatches repMatches nCount qNumInsert qBaseInsert tNumInsert tBaseInsert strand qName qSize qStart qEnd tName tSize tStart tEnd blockCount blockSizes qStarts tStarts);
20 use enum
qw(:rmsk__ bin swScore milliDiv milliDel milliIns genoName genoStart genoEnd genoLeft strand repName repClass repFamily repStart repEnd repLeft id);
21 use enum
qw(:clonePos__ name seqSize phase chrom chromStart chromEnd stage faFile);
22 use enum
qw(:ctgPos__ contig size chrom chromStart chromEnd);
23 use enum
qw(:cytoBand__ chrom chromStart chromEnd name gieStain);
24 use enum
qw(:fishClones__ chrom chromStart chromEnd name score placeCount bandStarts bandEnds labs placeType accCount accNames stsCount stsNames beCount beNames);
25 use enum
qw(:gcPercent__ chrom chromStart chromEnd name gcPpt);
26 use enum
qw(:genscan__ name chrom strand txStart txEnd cdsStart cdsEnd exonCount exonStarts exonEnds);
27 use enum
qw(:genscanSubopt__ bin chrom chromStart chromEnd name score strand);
28 use enum
qw(:jaxOrtholog__ humanSymbol humanBand mgiId mouseSymbol mouseChr mouseCm mouseBand);
29 use enum
qw(:refGene__ name chrom strand txStart txEnd cdsStart cdsEnd exonCount exonStarts exonEnds);
30 use enum
qw(:refLink__ name product mrnaAcc protAcc geneName prodName locusLinkID omimId);
31 use enum
qw(:refSeqAli__ bin matches misMatches repMatches nCount qNumInsert qBaseInsert tNumInsert tBaseInsert strand qName qSize qStart qEnd tName tSize tStart tEnd blockCount blockSizes qStarts tStarts);
32 use enum
qw(:simpleRepeat__ bin chrom chromStart chromEnd name period copyNum consensusSize perMatch perIndel score A C G T entropy sequence);
33 use enum
qw(:stsAlias__ alias identNo trueName);
34 use enum
qw(:stsInfo__ identNo name gbCount genbank gdbCount gdb nameCount otherNames dbSTSid otherDbstsCount otherDbSTS leftPrimer rightPrimer distance organism sequence otherUCSCcount otherUCSC mergeUCSCcount mergeUCSC genethonName genethonChr genethonPos genethonLOD marshfieldName marshfieldChr marshfieldPos marshfieldLOD wiyacName wiyacChr wiyacPos wiyacLOD wirhName wirhChr wirhPos wirhLOD gm99gb4Name gm99gb4Chr gm99gb4Pos gm99gb4LOD gm99g3Name gm99g3Chr gm99g3Pos gm99g3LOD tngName tngChr tngPos tngLOD);
35 use enum
qw(:stsMap__ chrom chromStart chromEnd name score identNo ctgAcc otherAcc genethonChrom genethonPos marshfieldChrom marshfieldPos gm99Gb4Chrom gm99Gb4Pos shgcTngChrom shgcTngPos shgcG3Chrom shgcG3Pos wiYacChrom wiYacPos wiRhChrom wiRhPos fishChrom beginBand endBand lab);
36 use enum
qw(:uniGene_2__ bin chrom chromStart chromEnd name score strand txStart txEnd reserved exonCount exonStarts exonEnds);
37 ###############################################
39 ###############################################
42 my %nolandmark = map {$_=>1} qw(gap cpgIsland recombRate_decode recombRate_marshfield recombRate_genethon
43 humMusL zoom1_humMusL zoom50_humMusL zoom2500_humMusL
44 genscanSubopt simpleRepeat snpNih snpTsc
47 foreach my $filename (@ARGV){
48 my $newfilename = $filename;
49 $newfilename =~ s/txt\.gz/gff/;
50 open(my $fhi, "zcat $filename |");
51 open(my $fho, ">$newfilename");
53 while(my $line = <$fhi>){
55 #these three should work the same way as unigene, but the fields are different order
56 # $filename =~ /affyRatio/ ? toGFF($line,$fho,['affyRatio', '', 3, 0, 1, 2, 4, 5,-1,-1,-1,10,11]) :
57 # $filename =~ /nci60/ ? toGFF($line,$fho,['nci60', '', 3, 0, 1, 2, 4, 5,-1,-1,-1,10,11]) :
58 # $filename =~ /rnaCluster/ ? toGFF($line,$fho,['rnaCluster', '', 4, 1, 2, 3, 5, 6,-1, 7, 8,11,12]) :
60 #these two are not yet handled
61 # $filename =~ /cpgIsland/ ? toGFF($line,$fho,['cpgIsland', '', 3, 0, 1, 2,-1,-1,-1]) :
62 # $filename =~ /estOrientInfo/ ? toGFF($line,$fho,['estOrientInfo', '',]) :
65 $filename =~ /uniGene_2/ ? toGFF
($line,$fho,['uniGene_2', '', 4, 1, 2, 3, 5, 6,-1,-1,-1,11,12]) :
67 $filename =~ /all_bacends/ ? toGFF
($line,$fho,['bacends', '', 9,13,15,16,-1, 8,-1,11,12,18,20]) :
68 $filename =~ /all_est/ ? toGFF
($line,$fho,['est', '',10,14,16,17,-1, 9,-1,12,13,19,21]) :
69 $filename =~ /all_mrna/ ? toGFF
($line,$fho,['mrna', '',10,14,16,17,-1, 9,-1,12,13,19,21]) :
70 $filename =~ /all_sts_primer/ ? toGFF
($line,$fho,['sts_primer', '', 9,13,15,16,-1, 8,-1,11,12,18,20]) :
71 $filename =~ /all_sts_seq/ ? toGFF
($line,$fho,['sts_seq', '', 9,13,15,16,-1, 8,-1,11,12,18,20]) :
72 $filename =~ /blastzBestMouse/ ? toGFF
($line,$fho,['blastzBestMouse', '',10,14,16,17,-1, 9,-1,12,13,19,21]) :
73 $filename =~ /blastzMm2/ ? toGFF
($line,$fho,['blastzMm2', '',10,14,16,17,-1, 9,-1,12,13,19,21]) :
74 $filename =~ /blastzTightMouse/ ? toGFF
($line,$fho,['blastzTightMouse', '',10,14,16,17,-1, 9,-1,12,13,19,21]) :
75 $filename =~ /blatFish/ ? toGFF
($line,$fho,['blatFish', '',10,14,16,17,-1, 9,-1,12,13,19,21]) :
76 $filename =~ /chimpBac/ ? toGFF
($line,$fho,['chimpBac', '',10,14,16,17,-1, 9,-1,12,13,19,21]) :
77 $filename =~ /chimpBlat/ ? toGFF
($line,$fho,['chimpBlat', '',10,14,16,17,-1, 9,-1,12,13,19,21]) :
78 $filename =~ /clonePos/ ? toGFF
($line,$fho,['clonePos', '', 0, 3, 4, 5,-1,-1, 2]) :
79 $filename =~ /ctgPos/ ? toGFF
($line,$fho,['ctgPos', '', 0, 2, 3, 4,-1,-1,-1]) :
80 $filename =~ /cytoBand/ ? toGFF
($line,$fho,['cytoBand', '', 3, 0, 1, 2,-1,-1,-1]) :
81 $filename =~ /est/ ? toGFF
($line,$fho,['est', '',10,14,16,17,-1,9,-1,12,13,19,21]) :
82 $filename =~ /fishClones/ ? toGFF
($line,$fho,['fishClones', '', 3, 0, 1, 2, 4,-1,-1]) :
83 $filename =~ /gap/ ? toGFF
($line,$fho,['gap', '', 7, 1, 2, 3,-1,-1,-1]) :
84 $filename =~ /gcPercent/ ? toGFF
($line,$fho,['gcPercent', '', 3, 0, 1, 2, 4,-1,-1]) :
85 $filename =~ /genMapDb/ ? toGFF
($line,$fho,['genMapDb', '', 3, 0, 1, 2, 4, 5,-1]) :
86 $filename =~ /genscanSubopt/ ? toGFF
($line,$fho,['genscanSubopt', '', 4, 1, 2, 3, 5, 6,-1]) :
87 $filename =~ /gold/ ? toGFF
($line,$fho,['gold', '', 6, 1, 2, 3,-1, 9,-1, 7, 8]) :
88 $filename =~ /intronEst/ ? toGFF
($line,$fho,['intron_est', '',10,14,16,17,-1, 9,-1,12,13,19,21]) :
89 $filename =~ /recombRate/ ?
eval { toGFF
($line,$fho,['recombRate_decode', '', 3, 0, 1, 2, 4,-1,-1]);
90 toGFF
($line,$fho,['recombRate_marshfield', '', 3, 0, 1, 2, 7,-1,-1]);
91 toGFF
($line,$fho,['recombRate_genethon', '', 3, 0, 1, 2,10,-1,-1]); } :
93 $filename =~ /refSeqAli/ ? toGFF
($line,$fho,['refSeqAli', '',10,14,16,17,-1, 9,-1,12,13,19,21]) :
94 $filename =~ /rmsk/ ? toGFF
($line,$fho,['rmsk', '',10, 5, 6, 7,-1, 9,-1,13,14]) :
95 $filename =~ /simpleRepeat/ ? toGFF
($line,$fho,['simpleRepeat', '', 4, 1, 2, 3,10,-1,-1]) :
96 $filename =~ /snpNih/ ? toGFF
($line,$fho,['snpNih', '', 4, 1, 2, 3]) :
97 $filename =~ /snpTsc/ ? toGFF
($line,$fho,['snpTsc', '', 4, 1, 2, 3]) :
98 $filename =~ /stsMap/ ? toGFF
($line,$fho,['stsMap', '', 3, 0, 1, 2, 4,-1,-1]) :
99 $filename =~ /xenoEst/ ? toGFF
($line,$fho,['xenoEst', '',10,14,16,17,-1, 9,-1,12,13,19,21]) :
100 $filename =~ /xenoMrna/ ? toGFF
($line,$fho,['xenoMrna', '',10,14,16,17,-1, 9,-1,12,13,19,21]) :
101 $filename =~ /zoom1_humMusL/ ? toGFF
($line,$fho,['zoom1_humMusL', '', 4, 1, 2, 3, 5, 6,-1]) :
102 $filename =~ /zoom2500_humMusL/ ? toGFF
($line,$fho,['zoom2500_humMusL', '', 4, 1, 2, 3, 5, 6,-1]) :
103 $filename =~ /zoom50_humMusL/ ? toGFF
($line,$fho,['zoom50_humMusL', '', 4, 1, 2, 3, 5, 6,-1]) :
104 $filename =~ /humMusL/ ? toGFF
($line,$fho,['humMusL', '', 4, 1, 2, 3, 5, 6,-1]) :
106 $filename =~ /(refGene|genscan|acembly|ensGene|refFlat|sanger22pseudo|sanger22|softberryGene|twinscan)/ ?
108 [$1, -1, 0, 2, 3, -1, -1, 4, 5, 6, 7, 9, 10]) :
117 ###############################################
118 # begin filetype-specific subroutines
119 ###############################################
122 my($line,$fho, $maps) = @_;
124 chomp $line; my @fields = split /\t/, $line;
126 if(!$nolandmark{render
($maps->[v_refmethod
],\
@fields)}){
127 print $fho join "\t", map {render
($maps->[$_],\
@fields)} (v_refseq
,
136 print $fho "Sequence " . render
($maps->[v_refgroup
],\
@fields);
140 if(!$nolandmark{render
($maps->[v_refmethod
],\
@fields)}){
141 print $fho join "\t", map {render
($maps->[$_],\
@fields)} (v_refseq
,
150 print $fho "CDS " . render
($maps->[v_refgroup
],\
@fields);
154 if(defined($maps->[v_exonstarts
]) and defined($maps->[v_exonstops
])){
155 my @starts = split /,/, render
($maps->[v_exonstarts
],\
@fields);
156 my @stops = split /,/, render
($maps->[v_exonstops
],\
@fields);
158 while(my $start = shift @starts){
159 my $stop = shift @stops;
160 print $fho join "\t", (render
($maps->[v_refseq
],\
@fields),
161 render
($maps->[v_refsource
],\
@fields),
162 render
($maps->[v_refmethod
],\
@fields),
165 render
($maps->[v_refscore
],\
@fields),
166 render
($maps->[v_refstrand
],\
@fields),
167 render
($maps->[v_refphase
],\
@fields),
168 render
($maps->[v_refmethod
],\
@fields) . " " . render
($maps->[v_refgroup
],\
@fields)
176 my($line,$fho, $maps) = @_;
178 chomp $line; my @fields = split /\t/, $line;
180 if(!$maps->[u_qrystart
] and !$nolandmark{render
($maps->[u_refmethod
],\
@fields)}){
181 print $fho join "\t", map {render
($maps->[$_],\
@fields)} (u_refseq
,
190 print $fho "Sequence " . render
($maps->[u_refgroup
],\
@fields);
193 print $fho join "\t", map {render
($maps->[$_],\
@fields)} (u_refseq
,
202 if($maps->[u_qrystart
] >= 0){
203 print $fho "Target:" . render
($maps->[u_refmethod
],\
@fields) . " ";
204 print $fho render
($maps->[u_refgroup
],\
@fields) . " " .
205 render
($maps->[u_qrystart
],\
@fields) . " " .
206 render
($maps->[u_qrystop
], \
@fields);
208 print $fho "Sequence " . render
($maps->[u_refgroup
],\
@fields) . " ";
212 if(defined($maps->[u_starts
]) and defined($maps->[u_sizes
])){
213 my @starts = split /,/, render
($maps->[u_starts
],\
@fields);
214 my @sizes = split /,/, render
($maps->[u_sizes
],\
@fields);
217 while(defined($start = shift @starts)){
218 my $size = shift @sizes;
220 if($maps->[u_qrystart
] < 1 and $maps->[u_qrystop
] < 1){
221 print $fho join "\t", (render
($maps->[u_refseq
],\
@fields),
222 render
($maps->[u_refsource
],\
@fields),
223 render
($maps->[u_refmethod
],\
@fields),
224 render
($maps->[u_refstart
],\
@fields) + $start,
225 render
($maps->[u_refstart
],\
@fields) + $start + $size,
226 render
($maps->[u_refscore
],\
@fields),
227 render
($maps->[u_refstrand
],\
@fields),
228 render
($maps->[u_refphase
],\
@fields),
229 render
($maps->[u_refmethod
],\
@fields) . " " . render
($maps->[u_refgroup
],\
@fields)
232 print $fho join "\t", (render
($maps->[u_refseq
],\
@fields),
233 render
($maps->[u_refsource
],\
@fields),
234 render
($maps->[u_refmethod
],\
@fields),
237 render
($maps->[u_refscore
],\
@fields),
238 render
($maps->[u_refstrand
],\
@fields),
239 render
($maps->[u_refphase
],\
@fields),
240 render
($maps->[u_refmethod
],\
@fields) . " " . render
($maps->[u_refgroup
],\
@fields)
248 my($index,$fields) = @_;
249 return '.' if $index == -1;
250 return $index unless $index =~ /^\d+$/;
251 return $fields->[$index];