* seq_inds is not defined for Model-based HSPs
[bioperl-live.git] / Bio / FeatureIO / gff.pm
blobf98f97c0e500f14db2c3a6cf99c0f70035b4f04b
1 =pod
3 =head1 NAME
5 Bio::FeatureIO::gff - read/write GFF feature files
7 =head1 SYNOPSIS
9 my $feature; #get a Bio::SeqFeature::Annotated somehow
10 my $featureOut = Bio::FeatureIO->new(
11 -format => 'gff',
12 -version => 3,
13 -fh => \*STDOUT,
14 -validate_terms => 1, #boolean. validate ontology terms online? default 0 (false).
16 $featureOut->write_feature($feature);
18 =head1 DESCRIPTION
20 Currently implemented:
22 version read? write?
23 ------------------------------
24 GFF 1 N N
25 GFF 2 N N
26 GFF 2.5 (GTF) N Y
27 GFF 3 Y Y
29 =head1 FEEDBACK
31 =head2 Mailing Lists
33 User feedback is an integral part of the evolution of this and other
34 Bioperl modules. Send your comments and suggestions preferably to
35 the Bioperl mailing list. Your participation is much appreciated.
37 bioperl-l@bioperl.org - General discussion
38 http://bioperl.org/wiki/Mailing_list - About the mailing lists
40 =head2 Reporting Bugs
42 Report bugs to the Bioperl bug tracking system to help us keep track
43 of the bugs and their resolution. Bug reports can be submitted via
44 the web:
46 http://bugzilla.open-bio.org/
48 =head1 AUTHOR
50 Allen Day, <allenday@ucla.edu>
52 =head1 CONTRIBUTORS
54 Steffen Grossmann, <grossman@molgen.mpg.de>
55 Scott Cain, <cain@cshl.edu>
56 Rob Edwards <rob@salmonella.org>
58 =head1 APPENDIX
60 The rest of the documentation details each of the object methods.
61 Internal methods are usually preceded with a _
63 =cut
66 # Let the code begin...
68 package Bio::FeatureIO::gff;
69 use strict;
71 #these are alphabetical, keep them that way.
72 use Bio::Annotation::DBLink;
73 use Bio::Annotation::OntologyTerm;
74 use Bio::Annotation::SimpleValue;
75 use Bio::Annotation::Target;
76 use Bio::FeatureIO;
77 use Bio::Ontology::OntologyStore;
78 use Bio::OntologyIO;
79 use Bio::SeqFeature::Annotated;
80 use Bio::SeqIO;
81 use URI::Escape;
83 use base qw(Bio::FeatureIO);
85 use constant DEFAULT_VERSION => 3;
86 my $RESERVED_TAGS = "ID|Name|Alias|Parent|Target|Gap|Derives_from|Note|Dbxref|dbxref|Ontology_term|Index|CRUD";
88 sub _initialize {
89 my($self,%arg) = @_;
91 $self->SUPER::_initialize(%arg);
93 $self->version( $arg{-version} || DEFAULT_VERSION);
94 $self->validate($arg{-validate_terms} || 0);
96 if ($arg{-file} =~ /^>.*/ ) {
97 $self->_print("##gff-version " . $self->version() . "\n");
99 else {
100 my $directive;
101 while(($directive = $self->_readline()) && ($directive =~ /^##/) ){
102 $self->_handle_directive($directive);
104 $self->_pushback($directive);
107 #need to validate against SOFA, no SO
108 if ($self->validate) {
109 $self->so(
110 Bio::Ontology::OntologyStore->get_ontology('Sequence Ontology Feature Annotation')
115 =head2 next_feature()
117 Usage : my $feature = $featureio->next_feature();
118 Function: reads a feature record from a GFF stream and returns it as an object.
119 Returns : a Bio::SeqFeature::Annotated object
120 Args : N/A
122 =cut
124 sub next_feature {
125 my $self = shift;
126 my $gff_string;
128 my($f) = $self->_buffer_feature();
129 if($f){
130 return $f;
133 return if $self->fasta_mode();
135 # be graceful about empty lines or comments, and make sure we return undef
136 # if the input is consumed
137 while(($gff_string = $self->_readline()) && defined($gff_string)) {
138 next if $gff_string =~ /^\s*$/; #skip blank lines
139 next if $gff_string =~ /^\#[^#]/; #skip comments, but not directives
140 last;
143 return unless $gff_string;
145 # looks like we went into FASTA mode without a directive.
146 if($gff_string =~ /^>/){
147 $self->_pushback($gff_string);
148 $self->fasta_mode(1);
149 return;
152 # got a directive
153 elsif($gff_string =~ /^##/){
154 $self->_handle_directive($gff_string);
155 # recurse down to the next line. this will bottom out on finding a real feature or EOF
156 return $self->next_feature();
159 # got a feature
160 else {
161 return $self->_handle_feature($gff_string);
165 =head2 next_feature_group
167 Title : next_feature_group
168 Usage : @feature_group = $stream->next_feature_group
169 Function: Reads the next feature_group from $stream and returns it.
171 Feature groups in GFF3 files are separated by '###' directives. The
172 features in a group might form a hierarchical structure. The
173 complete hierarchy of features is returned, i.e. the returned array
174 represents only the top-level features. Lower-level features can
175 be accessed using the 'get_SeqFeatures' method recursively.
177 Example : # getting the complete hierarchy of features in a GFF3 file
178 my @toplevel_features;
179 while (my @fg = $stream->next_feature_group) {
180 push(@toplevel_features, @fg);
182 Returns : an array of Bio::SeqFeature::Annotated objects
183 Args : none
185 =cut
187 sub next_feature_group {
188 my $self = shift;
190 my $feat;
191 my %seen_ids;
192 my @all_feats;
193 my @toplevel_feats;
195 $self->{group_not_done} = 1;
197 while ($self->{group_not_done} && ($feat = $self->next_feature()) && defined($feat)) {
198 # we start by collecting all features in the group and
199 # memorizing those which have an ID attribute
200 my $anno_ID = $feat->get_Annotations('ID');
201 if(ref($anno_ID)) {
202 my $attr_ID = $anno_ID->value;
203 $self->throw("Oops! ID $attr_ID exists more than once in your file!")
204 if (exists($seen_ids{$attr_ID}));
205 $seen_ids{$attr_ID} = $feat;
207 push(@all_feats, $feat);
210 # assemble the top-level features
211 foreach $feat (@all_feats) {
212 my @parents = $feat->get_Annotations('Parent');
213 if (@parents) {
214 foreach my $parent (@parents) {
215 my $parent_id = $parent->value;
216 $self->throw("Parent with ID $parent_id not found!") unless (exists($seen_ids{$parent_id}));
217 $seen_ids{$parent_id}->add_SeqFeature($feat);
219 } else {
220 push(@toplevel_feats, $feat);
224 return @toplevel_feats;
227 =head2 next_seq()
229 access the FASTA section (if any) at the end of the GFF stream. note that this method
230 will return undef if not all features in the stream have been handled
232 =cut
234 sub next_seq() {
235 my $self = shift;
236 return unless $self->fasta_mode();
238 #first time next_seq has been called. initialize Bio::SeqIO instance
239 if(!$self->seqio){
240 $self->seqio( Bio::SeqIO->new(-format => 'fasta', -fh => $self->_fh()) );
242 return $self->seqio->next_seq();
245 =head2 write_feature()
247 Usage : $featureio->write_feature( Bio::SeqFeature::Annotated->new(...) );
248 Function: writes a feature in GFF format. the GFF version used is governed by the
249 '-version' argument passed to Bio::FeatureIO->new(), and defaults to GFF
250 version 3.
251 Returns : ###FIXME
252 Args : a Bio::SeqFeature::Annotated object.
254 =cut
256 sub write_feature {
257 my($self,$feature) = @_;
258 if (!$feature) {
259 $self->throw("gff.pm cannot write_feature unless you give a feature to write.\n");
261 $self->throw("only Bio::SeqFeature::Annotated objects are writeable") unless $feature->isa('Bio::SeqFeature::Annotated');
263 if($self->version == 1){
264 return $self->_write_feature_1($feature);
265 } elsif($self->version == 2){
266 return $self->_write_feature_2($feature);
267 } elsif($self->version == 2.5){
268 return $self->_write_feature_25($feature);
269 } elsif($self->version == 3){
270 return $self->_write_feature_3($feature);
271 } else {
272 $self->throw(sprintf("don't know how to write GFF version %s",$self->version));
276 ################################################################################
278 =head1 ACCESSORS
280 =cut
282 =head2 fasta_mode()
284 Usage : $obj->fasta_mode($newval)
285 Function:
286 Example :
287 Returns : value of fasta_mode (a scalar)
288 Args : on set, new value (a scalar or undef, optional)
291 =cut
293 sub fasta_mode {
294 my($self,$val) = @_;
295 $self->{'fasta_mode'} = $val if defined($val);
296 return $self->{'fasta_mode'};
299 =head2 seqio()
301 Usage : $obj->seqio($newval)
302 Function: holds a Bio::SeqIO instance for handling the GFF3 ##FASTA section.
303 Returns : value of seqio (a scalar)
304 Args : on set, new value (a scalar or undef, optional)
307 =cut
309 sub seqio {
310 my($self,$val) = @_;
311 $self->{'seqio'} = $val if defined($val);
312 return $self->{'seqio'};
315 =head2 sequence_region()
317 Usage :
318 Function: ###FIXME
319 Returns :
320 Args :
323 =cut
325 sub sequence_region {
326 my ($self,$k,$v) = @_;
327 if(defined($k) && defined($v)){
328 $self->{'sequence_region'}{$k} = $v;
329 return $v;
331 elsif(defined($k)){
332 return $self->{'sequence_region'}{$k};
334 else {
335 return;
340 =head2 so()
342 Usage : $obj->so($newval)
343 Function: holds a Sequence Ontology instance
344 Returns : value of so (a scalar)
345 Args : on set, new value (a scalar or undef, optional)
347 =cut
349 sub so {
350 my $self = shift;
351 my $val = shift;
352 ###FIXME validate $val object's type
353 $self->{so} = $val if defined($val);
354 return $self->{so};
357 =head2 validate()
359 Usage : $obj->validate($newval)
360 Function: true if encountered ontology terms in next_feature()
361 mode should be validated.
362 Returns : value of validate (a scalar)
363 Args : on set, new value (a scalar or undef, optional)
366 =cut
368 sub validate {
369 my($self,$val) = @_;
370 $self->{'validate'} = $val if defined($val);
371 return $self->{'validate'};
374 =head2 version()
376 Usage : $obj->version($newval)
377 Function: version of GFF to read/write. valid values are 1, 2, 2.5, and 3.
378 Returns : value of version (a scalar)
379 Args : on set, new value (a scalar or undef, optional)
381 =cut
383 sub version {
384 my $self = shift;
385 my $val = shift;
386 my %valid = map {$_=>1} (1, 2, 2.5, 3);
387 if(defined $val && $valid{$val}){
388 return $self->{'version'} = $val;
390 elsif(defined($val)){
391 $self->throw('invalid version. valid versions: '.join(' ', sort keys %valid));
393 return $self->{'version'};
396 ################################################################################
398 =head1 INTERNAL METHODS
400 =cut
402 =head2 _buffer_feature()
404 Usage :
405 Function: ###FIXME
406 Returns :
407 Args :
409 =cut
411 sub _buffer_feature {
412 my ($self,$f) = @_;
414 if ( $f ) {
415 push @{ $self->{'buffer'} }, $f;
416 return $f;
418 elsif ( $self->{'buffer'} ) {
419 return shift @{ $self->{'buffer'} };
421 else {
422 return;
427 =head1 _handle_directive()
429 this method is called for lines beginning with '##'.
431 =cut
433 sub _handle_directive {
434 my($self,$directive_string) = @_;
436 $directive_string =~ s/^##//; #remove escape
437 my($directive,@arg) = split /\s+/, $directive_string;
439 if($directive eq 'gff-version'){
440 my $version = $arg[0];
441 if($version != 3){
442 $self->throw("this is not a gff version 3 document, it is version '$version'");
446 elsif($directive eq 'sequence-region'){
447 # RAE: Sequence regions are in the format sequence-region seqid start end
448 # for these we want to store the seqid, start, and end. Then when we validate
449 # we want to make sure that the features are within the seqid/start/end
451 $self->throw('Both start and end for sequence region should be defined')
452 unless $arg[1] && $arg[2];
453 my $fta = Bio::Annotation::OntologyTerm->new();
454 $fta->name( 'region');
456 my $f = Bio::SeqFeature::Annotated->new();
457 $f->seq_id( $arg[0] );
458 $f->start( $arg[1] );
459 $f->end( $arg[2] );
461 $f->type( $fta );
463 #cache this in sequence_region(), we may need it for validation later.
464 $self->sequence_region($f->seq_id => $f);
466 #NOTE: is this the right thing to do -- treat this as a feature? -allenday
467 #buffer it to be returned by next_feature()
468 $self->_buffer_feature($f);
471 elsif($directive eq 'feature-ontology'){
472 $self->warn("'##$directive' directive handling not yet implemented");
475 elsif($directive eq 'attribute-ontology'){
476 $self->warn("'##$directive' directive handling not yet implemented");
479 elsif($directive eq 'source-ontology'){
480 $self->warn("'##$directive' directive handling not yet implemented");
483 elsif($directive eq 'FASTA' or $directive =~ /^>/){
484 #next_seq() will take care of this.
485 $self->fasta_mode(1);
486 return;
489 elsif($directive eq '#'){
490 #all forward references resolved
491 $self->{group_not_done} = 0;
494 elsif($directive eq 'organism') {
495 my $organism = $arg[0];
496 $self->organism($organism);
499 else {
500 $self->throw("don't know what do do with directive: '##".$directive."'");
504 =head1 _handle_feature()
506 this method is called for each line not beginning with '#'. it parses the line and returns a
507 Bio::SeqFeature::Annotated object.
509 =cut
511 sub _handle_feature {
512 my($self,$feature_string) = @_;
514 my $feat = Bio::SeqFeature::Annotated->new();
516 my($seq,$source,$type,$start,$end,$score,$strand,$phase,$attribute_string) = split /\t/, $feature_string;
518 $feat->seq_id($seq);
519 $feat->source_tag($source);
520 $feat->start($start) unless $start eq '.';
521 $feat->end($end) unless $end eq '.';
522 $feat->strand($strand eq '+' ? 1 : $strand eq '-' ? -1 : 0);
523 $feat->score($score);
524 $feat->phase($phase);
526 my $fta = Bio::Annotation::OntologyTerm->new();
528 if($self->validate()){
529 # RAE Added a couple of validations based on the GFF3 spec at http://song.sourceforge.net/gff3.shtml
530 # 1. Validate the id
531 if ($seq =~ /[^a-zA-Z0-9\.\-\:\^\*\$\@\!\+\_\?]/) { # I just escaped everything.
532 $self->throw("Validation Error: seqid ($seq) contains characters that are not [a-zA-Z0-9.:^*\$\@!+_?\-] and not escaped");
535 if ($seq =~ /\s/) {
536 $self->throw("Validation Error: seqid ($seq) contains unescaped whitespace")
539 # NOTE i think we're handling this in as a directive, and this test may be removed -allenday
540 if ($seq =~ /^>/) {
541 $self->throw("Validation Error: seqid ($seq) begins with a >")
544 # 2. Validate the starts and stops.
545 # these need to be within the region's bounds, and
546 # also start <= end. bail out if either is not true.
547 if ($start > $end) {
548 $self->throw("Validation Error: start ($start) must be less than or equal to end in $seq");
551 my $region = $self->sequence_region($seq);
552 # NOTE: we can only validate against sequence-region that are declared in the file.
553 # if i reference some region from elsewhere, can't validate. if we want to be really strict
554 # we should bail out here. -allenday
555 if ( defined($region) && $start < $region->start() || $end > $region->end() ) {
556 $self->throw("Validation Error: sequence location ($seq from $start to $end) does not appear to lie within a defined sequence-region")
559 # 3. Validate the strand.
560 # In the unvalidated version +=1 and -=-1. Everything else is 0. We just need to warn when it is not [+-.?]
561 $self->throw("Validation Error: strand is not one of [+-.?] at $seq") if ($strand =~ /^[^\+\-\.\?]$/);
563 # 4. Validate the phase to be one of [.012]
564 $self->throw("Validation Error: phase is not one of [.012] at $seq") if ($phase =~ /^[^\.012]$/);
566 my $feature_type;
567 if($type =~ /^\D+:\d+$/){
568 #looks like an identifier
569 ($feature_type) = $self->so->find_terms(-identifier => $type);
570 } else {
571 #looks like a name
572 ($feature_type) = $self->so->find_terms(-name => $type);
575 if(!$feature_type){
576 $self->throw("Validation Error: couldn't find ontology term for '$type'.");
578 $fta->term($feature_type);
579 } else {
581 if($type =~ /^\D+:\d+$/){
582 #looks like an identifier
583 $fta->identifier($type)
584 } else {
585 $fta->name($type);
589 $feat->type($fta);
591 my %attr = ();
592 chomp $attribute_string;
594 unless ( $attribute_string eq '.' ) {
595 my @attributes = split ';', $attribute_string;
596 foreach my $attribute (@attributes){
597 my($key,$values) = split '=', $attribute;
599 # remove leading and trailing quotes from values
600 $values =~ s/^["']//;
601 $values =~ s/["']$//; #' terminate the quote for emacs
603 my @values = map{uri_unescape($_)} split ',', $values;
605 #minor hack to allow for multiple instances of the same tag
606 if ($attr{$key}) {
607 my @tmparray = @{$attr{$key}};
608 push @tmparray, @values;
609 $attr{$key} = [@tmparray];
610 } else {
611 $attr{$key} = [@values];
616 #Handle Dbxref attributes
617 if($attr{Dbxref} or $attr{dbxref}){
618 foreach my $value (@{ $attr{Dbxref} }, @{ $attr{dbxref} }){
619 my $a = Bio::Annotation::DBLink->new();
620 my($db,$accession) = $value =~ /^(.+?):(.+)$/;
622 if(!$db or !$accession){ #dbxref malformed
623 $self->throw("Error in line:\n$feature_string\nDbxref value '$value' did not conform to GFF3 specification");
624 next;
627 $a->database($db);
628 $a->primary_id($accession);
629 $feat->add_Annotation('Dbxref',$a);
633 #Handle Ontology_term attributes
634 if($attr{Ontology_term}){
635 foreach my $id (@{ $attr{Ontology_term} }){
636 my $a = Bio::Annotation::OntologyTerm->new();
638 if($self->validate()){
639 my $ont_name = Bio::Ontology::OntologyStore->guess_ontology($id);
640 my $ont = Bio::Ontology::OntologyStore->get_ontology($ont_name);
641 my($term) = $ont->find_terms(-identifier => $id);
642 $a->term($term);
643 } else {
644 $a->identifier($id);
647 $feat->add_Annotation('Ontology_term',$a);
651 #Handle Gap attributes
652 if($attr{Gap}){
653 for my $value (@{ $attr{Gap} }) {
654 my $a = Bio::Annotation::SimpleValue->new();
655 $a->value($value);
656 $feat->add_Annotation('Gap',$a);
660 #Handle Target attributes
661 if($attr{Target}){
662 my $target_collection = Bio::Annotation::Collection->new();
664 foreach my $target_string (@{ $attr{Target} } ) {
666 #only replace + for space if + has been used in place of it
667 #that is, + could also mean plus strand, and we don't want
668 #to accidentally remove it
670 #presumably you can't use + for space and + for strand in the same string.
671 $target_string =~ s/\+/ /g unless $target_string =~ / /;
673 my ($t_id,$tstart,$tend,$strand,$extra) = split /\s+/, $target_string;
674 if (!$tend || $extra) { # too much or too little stuff in the string
675 $self->throw("The value in the Target string, $target_string, does not conform to the GFF3 specification");
678 my $a = Bio::Annotation::Target->new(
679 -target_id => $t_id,
680 -start => $tstart,
681 -end => $tend,
684 if ($strand && $strand eq '+') {
685 $strand = 1;
686 } elsif ($strand && $strand eq '-') {
687 $strand = -1;
688 } else {
689 $strand = '';
692 $a->strand($strand) if $strand;
693 $feat->add_Annotation('Target',$a);
697 #Handle ID attribute. May only have one ID, throw error otherwise
699 if($attr{ID}){
700 if(scalar( @{ $attr{ID} } ) > 1){
701 $self->throw("Error in line:\n$feature_string\nA feature may have at most one ID value");
704 #ID's must be unique in the file
705 if ($self->{'allIDs'}->{${$attr{ID}}[0]} && $self->validate()) {
706 $self->throw("Validation Error: The ID ${$attr{ID}}[0] occurs more than once in the file, but should be unique");
708 $self->{'allIDs'}->{${$attr{ID}}[0]} = 1;
711 my $a = Bio::Annotation::SimpleValue->new();
712 $a->value( @{ $attr{ID} }[0] );
713 $feat->add_Annotation('ID',$a);
716 #Handle Name attribute. May only have one Name, throw error otherwise
717 if($attr{Name}){
718 if(scalar( @{ $attr{Name} } ) > 1){
719 $self->throw("Error in line:\n$feature_string\nA feature may have at most one Name value");
722 my $a = Bio::Annotation::SimpleValue->new();
723 $a->value( @{ $attr{Name} }[0] );
724 $feat->add_Annotation('Name',$a);
727 foreach my $other_canonical (qw(Alias Parent Note Derives_from Index CRUD)){
728 if($attr{$other_canonical}){
729 foreach my $value (@{ $attr{$other_canonical} }){
730 my $a = Bio::Annotation::SimpleValue->new();
731 $a->value($value);
732 $feat->add_Annotation($other_canonical,$a);
737 my @non_reserved_tags = grep {/^[a-z]/} keys %attr;
738 foreach my $non_reserved_tag (@non_reserved_tags) {
739 next if ($non_reserved_tag eq 'dbxref');
740 foreach my $value (@{ $attr{$non_reserved_tag} }){
741 $feat = $self->_handle_non_reserved_tag($feat,$non_reserved_tag,$value);
745 my @illegal_tags = grep
746 {!/($RESERVED_TAGS)/}
747 grep {/^[A-Z]/} keys %attr;
749 if (@illegal_tags > 0) {
750 my $tags = join(", ", @illegal_tags);
751 $self->throw("The following tag(s) are illegal and are causing this parser to die: $tags");
754 return $feat;
757 =head2 _handle_non_reserved_tag()
759 Usage : $self->_handle_non_reserved_tag($feature,$tag,$value)
760 Function: Deal with non-reserved word tags in the ninth column
761 Returns : An updated Bio::SeqFeature::Annotated object
762 Args : A Bio::SeqFeature::Annotated and a tag/value pair
764 Note that this method can be overridden in a subclass to provide
765 special handling of non-reserved word tags.
767 =cut
769 sub _handle_non_reserved_tag {
770 my $self = shift;
771 my ($feat,$tag,$value) = @_;
773 # to customize through subclassing and overriding:
774 #if ($tag eq 'someTagOfInterest') {
775 # do something different
776 # else { do what is below
778 my $a;
779 if ($tag eq 'comment') {
780 $a = Bio::Annotation::Comment->new();
782 else {
783 $a = Bio::Annotation::SimpleValue->new();
785 $a->value($value);
786 $feat->add_Annotation($tag,$a);
788 return $feat;
791 =head1 organims
793 Gets/sets the organims from the organism directive
795 =cut
797 sub organism {
798 my $self = shift;
799 my $organism = shift if defined(@_);
800 return $self->{'organism'} = $organism if defined($organism);
801 return $self->{'organism'};
805 =head1 _write_feature_1()
807 write a feature in GFF v1 format. currently not implemented.
809 =cut
811 sub _write_feature_1 {
812 my($self,$feature) = @_;
813 $self->throw(sprintf("write_feature unimplemented for GFF version %s",$self->version));
816 =head1 _write_feature_2()
818 write a feature in GFF v2 format. currently not implemented.
820 =cut
822 sub _write_feature_2 {
823 my($self,$feature) = @_;
824 $self->throw(sprintf("write_feature unimplemented for GFF version %s",$self->version));
827 =head1 _write_feature_25()
829 write a feature in GFF v2.5 (aka GTF) format.
831 =cut
833 sub _write_feature_25 {
834 my($self,$feature,$group) = @_;
836 #the top-level feature is an aggregate of all subfeatures
837 my ($transcript_id, $gene_id) = (($feature->get_Annotations('transcript_id'))[0], ($feature->get_Annotations('gene_id'))[0]);
838 if(!defined($group)){
839 $group = ($feature->get_Annotations('ID'))[0];
840 $transcript_id ||= $group;
841 $gene_id ||= $group;
845 my $seq = ref($feature->seq_id) ? $feature->seq_id->value : $feature->seq_id;
846 my $source = $feature->source->value;
847 my $type = $feature->type->name;
848 $type = 'EXON' if $type eq 'exon'; #a GTF peculiarity, incosistent with the sequence ontology.
849 my $min = $feature->start || '.';
850 my $max = $feature->end || '.';
851 my $strand = $feature->strand == 1 ? '+' : $feature->strand == -1 ? '-' : '.';
852 my $score = defined($feature->score) ? (ref($feature->score) ? $feature->score->value : $feature->score) : '.'; # score is optional
853 my $frame = defined($feature->frame) ? (ref($feature->frame) ? $feature->frame->value : $feature->frame) : (ref($feature->phase) ? $feature->phase->value : $feature->phase);
855 #these are the only valid types in a GTF document
856 if($type eq 'EXON' or $type eq 'CDS' or $type eq 'start_codon' or $type eq 'stop_codon'){
857 my $attr = sprintf('gene_id "%s"; transcript_id "%s";',$gene_id ? $gene_id->value : '',$transcript_id ? $transcript_id->value : '');
858 my $outstring = sprintf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n",
859 $seq,$source,$type,$min,$max,$score,$strand,$frame eq '.' ? 0 : $frame,$attr);
861 $self->_print($outstring);
864 foreach my $subfeat ($feature->get_SeqFeatures){
865 $self->_write_feature_25($subfeat,$group);
869 =head1 _write_feature_3()
871 write a feature in GFF v3 format.
873 =cut
875 sub _write_feature_3 {
876 my($self,$feature) = @_;
877 my $seq = ref($feature->seq_id) ? $feature->seq_id->value : $feature->seq_id;
878 my $source;
879 if ($feature->source()) {
880 $source = $feature->source->value;
882 else {
883 $source = $feature->source() || "unknownsource";
885 my $type;
886 if ($feature->type()) { $type = $feature->type->name; }
887 else { $type = "region"; }
888 my $min = $feature->start || '.';
889 my $max = $feature->end || '.';
890 my $strand = $feature->strand == 1 ? '+' : $feature->strand == -1 ? '-' : '.';
891 my $score = defined($feature->score) ? (ref($feature->score) ? $feature->score->value : $feature->score) : undef;
892 my $phase = $feature->phase->value;
894 my @attr;
895 if(my @v = ($feature->get_Annotations('Name'))){
896 my $vstring = join ',', map {uri_escape($_->value)} @v;
897 push @attr, "Name=$vstring";
899 if(my @v = ($feature->get_Annotations('ID'))){
900 my $vstring = join ',', map {uri_escape($_->value)} @v;
901 push @attr, "ID=$vstring";
902 $self->throw('GFF3 features may have at most one ID, feature with these IDs is invalid:\n'.$vstring) if scalar(@v) > 1;
904 if(my @v = ($feature->get_Annotations('Parent'))){
905 my $vstring = join ',', map {uri_escape($_->value)} @v;
906 push @attr, "Parent=$vstring";
908 if(my @v = ($feature->get_Annotations('dblink'))){
909 my $vstring = join ',', map {uri_escape($_->database .':'. $_->primary_id)} @v;
910 push @attr, "Dbxref=$vstring";
912 if(my @v = ($feature->get_Annotations('ontology_term'))){
913 my $vstring = join ',', map {uri_escape($_->identifier)} @v;
914 push @attr, "Ontology_term=$vstring";
916 if(my @v = ($feature->get_Annotations('comment'))){
917 my $vstring = join ',', map {uri_escape($_->text)} @v;
918 push @attr, "Note=$vstring";
920 if(my @v = ($feature->get_Annotations('Target'))){
921 my %strand_map = ( 1=>'+', 0=>'', -1=>'-', '+' => '+', '-' => '-' );
922 my $vstring = join ',', map {
923 uri_escape($_->target_id).' '.$_->start.' '.$_->end.(defined $_->strand ? ' '.$strand_map{$_->strand} : '')
924 } @v;
925 push @attr, "Target=$vstring";
928 my $attr = join ';', @attr;
930 my $outstring = sprintf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n",
931 $seq,$source,$type,$min,$max,$score,$strand,$phase,$attr);
933 $self->_print($outstring);
935 foreach my $subfeat ($feature->get_SeqFeatures){
936 $self->_write_feature_3($subfeat);