* sync with trunk
[bioperl-live.git] / Bio / CodonUsage / IO.pm
blob42fc462a53b03b0459867218f54c55afb8d3675f
1 # $Id$
3 # BioPerl module for Bio::CodonUsage::IO
5 # Cared for by Richard Adams (richard.adams@ed.ac.uk)
7 # Copyright Richard Adams
9 # You may distribute this module under the same terms as perl itself
10 # POD documentation - main docs before the code
12 =head1 NAME
14 Bio::CodonUsage::IO - for reading and writing codon usage tables to file
16 =head1 SYNOPSIS
18 use Bio::CodonUsage::IO;
20 ## read in a codon usage file
21 my $io = Bio::CodonUsage::IO->new(-file => "in");
22 my $cut = $io->next_data();
24 ## write it out again
25 my $out = Bio::CodonUsage::IO->new(-file => ">out");
26 $out->write_data($cut);
28 =head1 DESCRIPTION
30 This class provides standard IO methods for reading and writing text files
31 of codon usage tables. These tables can initially be retrieved using
32 Bio::DB::CUTG. At present only this format is supported for read/write.
34 Reading a CUTG will return a Bio::CodonUsage::Table object.
36 =head1 SEE ALSO
38 L<Bio::Tools::CodonTable>,
39 L<Bio::WebAgent>,
40 L<Bio::CodonUsage::Table>,
41 L<Bio::CodonUsage::IO>
43 =head1 FEEDBACK
45 =head2 Mailing Lists
47 User feedback is an integral part of the evolution of this and other
48 Bioperl modules. Send your comments and suggestions preferably to one
49 of the Bioperl mailing lists. Your participation is much appreciated.
51 bioperl-l@bioperl.org - General discussion
52 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
54 =head2 Reporting Bugs
56 Report bugs to the Bioperl bug tracking system to help us keep track
57 the bugs and their resolution. Bug reports can be submitted via the
58 web:
60 http://bugzilla.open-bio.org/
62 =head1 AUTHORS
64 Richard Adams, Richard.Adams@ed.ac.uk
66 =head1 APPENDIX
68 The rest of the documentation details each of the object
69 methods. Internal methods are usually preceded with a _
71 =cut
74 # Let the code begin
76 package Bio::CodonUsage::IO;
77 use Bio::CodonUsage::Table;
79 use base qw(Bio::Root::IO);
81 =head2 new
83 Title : new
84 Usage : my $io = Bio::CodonUsage::IO->new(-file => "CUTfile");
85 Purpose: To read/write a Bio:CodonUsage::Table object
86 Returns: A Bio:CodonUsage::IO object
87 Args : a file or file handle
89 =cut
91 sub new {
92 my ($class , @args) = @_;
93 my $self = $class->SUPER::new(@args);
97 =head2 next_data
99 Title : next_data
100 Usage : my $cut = $io->next_data();
101 Purpose: To obtain a Bio:CodonUsage::Table object
102 Returns: A Bio:CodonUsage::Table object
103 Args : none
105 =cut
107 sub next_data {
108 my $self = shift;
109 my $cut = $self->_parse;
110 return $cut;
113 =head2 write_data
115 Title : write_data
116 Usage : $io->write_data($cut);
117 Purpose: To write a CUT to file
118 Returns: void
119 Args : a Bio::CodonUsage::Table object reference
121 =cut
124 sub write_data {
125 my ($self, $cut) = @_;
126 if (!$cut || ! $cut->isa(Bio::CodonUsage::Table)) {
127 $self->throw("must supply a Bio::CodonUsage::Table object for writing\n");
129 my $outstring = "Codon usage table\n\n";
131 my $sp_string = $cut->species . "[" . $cut->_gb_db . "] " .
132 $cut->cds_count . " CDS's\n\n";
133 $outstring .= $sp_string;
134 my $colhead = sprintf("%-9s%-9s%15s%12s%12s\n\n", "AmAcid",
135 "Codon", "Number", "/1000", "Fraction");
136 $outstring .= $colhead;
138 ### now write bulk of codon data ##
139 my $ctable = Bio::Tools::CodonTable->new;
141 for my $f (qw(G A T C)) {
142 for my $s (qw(G A T C)) {
143 for my $t (qw(G A T C)) {
144 $cod = $f . $s . $t;
145 my $aa =$Bio::SeqUtils::THREECODE {$ctable->translate($cod)};
146 my $codstr = sprintf("%-9s%-9s%15.2f%12.2f%12.2f\n",
148 $aa, $cod, my $tt = $cut->codon_count($cod)|| 0.00,
149 my $ll =$cut->{'_table'}{$aa}{$cod}{'per1000'}|| 0.00,
150 my $ss = $cut->codon_rel_frequency($cod) || 0.00);
151 $outstring .= $codstr;
153 $outstring .= "\n";
156 $outstring .= "\n\n";
158 ## now append GC data
159 $outstring .= "Coding GC ". $cut->get_coding_gc('all'). "%\n";
160 $outstring .= "1st letter GC ". $cut->get_coding_gc(1). "%\n";
161 $outstring .= "2nd letter GC ". $cut->get_coding_gc(2). "%\n";
162 $outstring .= "3rd letter GC ". $cut->get_coding_gc(3). "%\n";
163 $outstring .= "Genetic code " . $cut->genetic_code() ."\n\n\n";
165 $self->_print ($outstring);
166 $self->flush();
170 sub _parse {
171 my $self = shift;
172 my $cdtableobj = Bio::CodonUsage::Table->new();
173 while (my $line = $self->_readline() ) {
174 next if $line =~ /^$/ ;
175 $line =~ s/End/Ter/;
176 ## now parse in species name, cds number
178 if ($line =~ /^(.+?)\s*\[(\w+)\].+?(\d+)/) {
179 $cdtableobj->species($1);
180 $cdtableobj->{'_gb_db'} = $2;
181 $cdtableobj->cds_count($3);
184 ## now parse in bulk of codon usage table
185 elsif ( $line =~ /^(\w\w\w)\s+(\w+)\s+(\d+\.\d+)
186 \s+(\d+\.\d+)\s+(\d+\.\d+)/x){
187 if (defined ($1)) {
188 $cdtableobj->{'_table'}{$1}{$2} = {
189 'abs_count'=>$3,
190 'per1000'=> $4,
191 'rel_freq'=> $5,
196 ## now parse in gc data ####
197 if($line =~ /^Cod.+?(\d\d\.\d\d)/ ){
198 $cdtableobj->{'_coding_gc'}{'all'} = $1;
200 elsif ($line =~ /^1st.+?(\d\d\.\d\d)/){
201 $cdtableobj->{'_coding_gc'}{'1'} = $1;
203 elsif($line =~ /^2nd.+?(\d\d\.\d\d)/){
204 $cdtableobj->{'_coding_gc'}{'2'} = $1;
206 elsif ($line =~ /^3rd.+?(\d\d\.\d\d)/){
207 $cdtableobj->{'_coding_gc'}{'3'} = $1;
210 elsif ($line =~ /^Gen.+?(\d+)/){
211 $cdtableobj->{'_genetic_code'} = $1;
214 ## check has been parsed ok ##
215 if (scalar keys %{$cdtableobj->{'_table'}} != 21) {
216 $cdtableobj->warn("probable parsing error - should be 21 entries for 20aa + stop codon");
218 return $cdtableobj;
224 __END__