Sync with main trunk
[bioperl-live.git] / Bio / FeatureIO / ptt.pm
blob7a7addc0e99152338601a3d5b8282e3c7ffee901
1 =pod
3 =head1 NAME
5 Bio::FeatureIO::ptt - read/write features in PTT format
7 =head1 SYNOPSIS
9 # read features
10 my $fin = Bio::FeatureIO->new(-file=>'genes.ptt', -format=>'ptt');
11 my @cds;
12 while (my $f = $fin->next_feature) {
13 push @cds, $f if $f->strand > 0;
16 # write features (NOT IMPLEMENTED)
17 my $fout = Bio::FeatureIO->new(-fh=>\*STDOUT, -format=>'ptt');
18 for my $f (@cds) {
19 $fout->write_feature($f);
22 =head1 DESCRIPTION
24 The PTT file format is a table of protein features.
25 It is used mainly by NCBI who produce PTT files for
26 all their published genomes found in L<ftp://ftp.ncbi.nih.gov/genomes/>.
27 It has the following format:
29 =over 4
31 =item Line 1
33 Description of sequence to which the features belong
34 eg. "Leptospira interrogans chromosome II, complete sequence - 0..358943"
36 It is usually equivalent to the DEFINITION line of a Genbank file,
37 with the length of the sequence appended. It is unclear why "0" is
38 used as a starting range, it should be "1".
40 =item Line 2
42 Number of feature lines in the table
43 eg. "367 proteins"
45 =item Line 3
47 Column headers, tab separated
48 eg. "Location Strand Length PID Gene Synonym Code COG Product"
50 Location : "begin..end" span of feature
51 Strand : "+" or "-"
52 Length : number of amino acids excluding the stop codon
53 PID : analogous to Genbank /db_xref="GI:xxxxxxxxx"
54 Gene : analogous to Genbank /gene="xxxx"
55 Synonym : analogous to Genbank /locus_tag="xxxx"
56 Synonym : analogous to Genbank /locus_tag="xxxx"
57 COG : CDD COG code with COG letter categories appended
58 Product : analogous to Genbank /product="xxxx"
60 =item Line 4 onwards
62 Feature lines, nine columns, tab separated, "-" used for empty fields
63 eg. "2491..3423 + 310 24217063 metF LB002 - COG0685E 5,10-methylenetetrahydrofolate reductase"
66 =back
68 =head1 FEEDBACK
70 =head2 Mailing Lists
72 User feedback is an integral part of the evolution of this and other
73 Bioperl modules. Send your comments and suggestions preferably to
74 the Bioperl mailing list. Your participation is much appreciated.
76 bioperl-l@bioperl.org - General discussion
77 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
79 =head2 Reporting Bugs
81 Report bugs to the Bioperl bug tracking system to help us keep track
82 of the bugs and their resolution. Bug reports can be submitted via
83 the web:
85 http://bugzilla.open-bio.org/
87 =head1 AUTHOR - Torsten Seemann
89 Email torsten.seemann AT infotech.monash.edu.au
91 =head1 CONTRIBUTORS
93 Based on bed.pm and gff.pm by Allen Day.
95 =head1 APPENDIX
97 The rest of the documentation details each of the object methods.
98 Internal methods are usually preceded with a _
100 =cut
103 # Let the code begin...
105 package Bio::FeatureIO::ptt;
107 use strict;
108 use base qw(Bio::FeatureIO);
109 use Bio::SeqFeature::Generic;
111 # map tab-separated column number to field name
112 our %NAME_OF = (
113 0 => 'Location',
114 1 => 'Strand',
115 2 => 'Length',
116 3 => 'PID',
117 4 => 'Gene',
118 5 => 'Synonym',
119 6 => 'Code',
120 7 => 'COG',
121 8 => 'Product',
123 our $NUM_COL = 9;
125 =head2 _initialize
127 Title : _initialize
128 Function: Reading? parses the header of the input
129 Writing?
131 =cut
133 sub _initialize {
134 my($self,%arg) = @_;
136 $self->SUPER::_initialize(%arg);
138 if ($self->mode eq 'r') {
139 # Line 1
140 my $desc = $self->_readline();
141 chomp $desc;
142 $self->description($desc);
143 # Line 2
144 my $line = $self->_readline();
145 $line =~ m/^(\d+) proteins/ or $self->throw("Invalid protein count");
146 $self->protein_count($1);
147 # Line 3
148 $self->_readline();
152 =head2 next_feature
154 Title : next_feature
155 Usage : $io->next_feature()
156 Function: read the next feature from the PTT file
157 Example :
158 Args :
159 Returns : Bio::SeqFeatureI object
161 =cut
163 sub next_feature {
164 my $self = shift;
165 $self->mode eq 'r' || return; # returns if can't read next_feature when we're in write mode
167 my $line = $self->_readline() or return; # returns if end of file, no more features?
168 chomp $line;
169 my @col = split m/\t/, $line;
170 @col==$NUM_COL or $self->throw("Too many columns for PTT line");
172 $col[0] =~ m/(\d+)\.\.(\d+)/ or $self->throw("Invalid location (column 1)");
173 my $feat = Bio::SeqFeature::Generic->new(-start=>$1, -end=>$2, -primary=>'CDS');
174 $col[1] =~ m/^([+-])$/ or $self->throw("Invalid strand (column 2)");
175 $feat->strand($1 eq '+' ? +1 : -1);
176 for my $i (2 .. $NUM_COL-1) {
177 $feat->add_tag_value($NAME_OF{$i}, $col[$i]) if $col[$i] ne '-';
179 return $feat;
182 =head2 write_feature (NOT IMPLEMENTED)
184 Title : write_feature
185 Usage : $io->write_feature($feature)
186 Function: write a Bio::SeqFeatureI object in PTT format
187 Example :
188 Args : Bio::SeqFeatureI object
189 Returns :
191 =cut
193 sub write_feature {
194 shift->throw_not_implemented;
197 =head2 description
199 Title : description
200 Usage : $obj->description($newval)
201 Function: set/get the PTT file description for/from line one
202 Example :
203 Returns : value of description (a scalar)
204 Args : on set, new value (a scalar or undef, optional)
206 =cut
208 sub description {
209 my $self = shift;
210 return $self->{'description'} = shift if @_;
211 return $self->{'description'};
214 =head2 protein_count
216 Title : protein_count
217 Usage : $obj->protein_count($newval)
218 Function: set/get the PTT protein count for/from line two
219 Example :
220 Args : on set, new value (a scalar or undef, optional)
221 Returns : value of protein_count (a scalar)
223 =cut
225 sub protein_count {
226 my $self = shift;
227 return $self->{'protein_count'} = shift if @_;
228 return $self->{'protein_count'};