tag fourth (and hopefully last) alpha
[bioperl-live.git] / branch-1-6 / Bio / Matrix / PSM / IO / transfac.pm
blob72a9900af89f6ea036ae50f68d656ff36571fb09
1 #---------------------------------------------------------
2 # $Id$
4 =head1 NAME
6 Bio::Matrix::PSM::IO::transfac - PSM transfac parser
8 =head1 SYNOPSIS
10 See Bio::Matrix::PSM::IO for documentation
12 =head1 DESCRIPTION
16 =head1 FEEDBACK
18 =head2 Mailing Lists
20 User feedback is an integral part of the evolution of this and other
21 Bioperl modules. Send your comments and suggestions preferably to one
22 of the Bioperl mailing lists. Your participation is much appreciated.
24 bioperl-l@bioperl.org - General discussion
25 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
27 =head2 Support
29 Please direct usage questions or support issues to the mailing list:
31 I<bioperl-l@bioperl.org>
33 rather than to the module maintainer directly. Many experienced and
34 reponsive experts will be able look at the problem and quickly
35 address it. Please include a thorough description of the problem
36 with code and data examples if at all possible.
38 =head2 Reporting Bugs
40 Report bugs to the Bioperl bug tracking system to help us keep track
41 the bugs and their resolution. Bug reports can be submitted via the
42 web:
44 http://bugzilla.open-bio.org/
46 =head1 AUTHOR - Stefan Kirov
48 Email skirov@utk.edu
50 =head1 APPENDIX
52 =cut
55 # Let the code begin...
56 package Bio::Matrix::PSM::IO::transfac;
57 use Bio::Matrix::PSM::Psm;
58 use Bio::Root::Root;
59 use Bio::Annotation::Reference;
60 use Bio::Annotation::Comment;
61 use Bio::Annotation::DBLink;
62 use strict;
64 use base qw(Bio::Matrix::PSM::PsmHeader Bio::Matrix::PSM::IO);
66 =head2 new
68 Title : new
69 Usage : my $psmIO = Bio::Matrix::PSM::IO->new(-format=>'transfac',
70 -file=>$file);
71 Function: Associates a file with the appropriate parser
72 Throws :
73 Example :
74 Args :
75 Returns : "Bio::Matrix::PSM::$format"->new(@args);
77 =cut
79 sub new {
80 my ($class,@args)=@_;
81 my $line;
82 my $self = $class->SUPER::new(@args);
83 my ($file)=$self->_rearrange(['FILE'], @args);
84 $self->_initialize_io(@args) || warn "Did you intend to use STDIN?"; #Read only for now
85 #Remove header
86 do {
87 $line=$self->_readline;
88 chomp $line;
89 push @{$self->{unstructured}},$line if (length($line)>2); } until ($line =~ m{^//}) || (!defined($line)); #Unstructured header
90 $self->_initialize;
91 return $self;
95 =head2 next_psm
97 Title : next_psm
98 Usage : my $psm=$psmIO->next_psm();
99 Function: Reads the next PSM from the input file, associated with this object
100 Throws : Upon finding a line, defining the matrix, where one or more positions
101 are not defined, see _make_matrix
102 Returns : Bio::Matrix::PSM::Psm object
103 Args : none
105 =cut
107 sub next_psm {
108 my $self=shift;
109 my $line;
110 return if ($self->{end});
111 my (@a,@c,@g,@t, $id, $tr1, @refs,$accn, $bf, $sites);
112 my $i=0;
113 while (defined( $line=$self->_readline)) {
114 chomp($line);
115 if ($line=~/^\d{2}/) { #Begining of the frequency data
116 ($a[$i],$c[$i],$g[$i],$t[$i])=_parse_matrix($line);
117 $i++;
119 ($tr1,$accn)=split(/\s{2}/,$line) if ($line=~/^AC\s/);
120 ($tr1,$bf)=split(/\s{2}/,$line) if ($line=~/^BF\s/);
121 ($tr1,$id)=split(/\s{2}/,$line) if ($line=~/^ID\s/);
122 last if (($line=~/^XX/) && ($i>0));
124 if (!(defined($id) && defined($accn))) {
125 $self->{end}=1;
126 return;
128 while (defined( $line=$self->_readline)) { #How many sites?
129 if ($line=~/^BA\s/) {
130 my ($tr1,$ba)=split(/\s{2}/,$line);
131 ($sites)=split(/\s/,$ba);
133 if ($line=~/^RN/) { #Adding a reference as Bio::Annotation object (self)
134 # not interested in RN line itself, since has only transfac-specific
135 # reference id? - no push back of line
136 my $ref=_parse_ref($self);
137 push @refs,$ref
139 last if ($line=~m{^//});
141 # We have the frequencies, let's create a SiteMatrix object
142 my %matrix = &_make_matrix($self,\@a,\@c,\@g,\@t,$id, $accn);
143 $matrix{-sites}=$sites if ($sites);
144 $matrix{-width}=@a;
145 my $psm=Bio::Matrix::PSM::Psm->new(%matrix);
146 foreach my $ref (@refs) { $psm->add_Annotation('reference',$ref); }
147 return $psm;
150 =head2 _parseMatrix
152 Title : _parseMatrix
153 Usage :
154 Function: Parses a line
155 Throws :
156 Example : Internal stuff
157 Returns : array (frequencies for A,C,G,T in this order).
158 Args : string
160 =cut
162 sub _parse_matrix {
163 my $line=shift;
164 $line=~s/\s+/,/g;
165 my ($tr,$a,$c,$g,$t)=split(/,/,$line);
166 return $a,$c,$g,$t;
170 =head2 _make_matrix
172 Title : _make_matrix
173 Usage :
174 Function:
175 Throws : If a position is undefined, for example if you have line like this
176 in the file you are parsing: 08 4,7,,9
177 Example : Internal stuff
178 Returns :
179 Args :
181 =cut
183 sub _make_matrix {
184 my ($a, $c, $g, $t, @fa, @fc,@fg, @ft, @a,@c,@g,@t);
185 my $ave=0;
186 my ($self,$cA,$cC,$cG,$cT, $id, $accn)= @_;
188 for (my $i=0; $i < @{$cA};$i++) {
189 #No value can be undefined -throw an exception, since setting to 0 probably would be wrong
190 #If this happens it would indicate most probably that the file, being parsed is in a different format
191 map { $self->throw('Parsing error, a position is not defined') unless defined(${$_}[$i]) } ($cA, $cG, $cC, $cT);
193 if ( (${$cA}[$i] + ${$cC}[$i] +
194 ${$cG}[$i] + ${$cT}[$i] ) ==0 ) {
195 push @a,$ave;
196 push @c,$ave;
197 push @g,$ave;
198 push @t,$ave;
200 else {
201 push @a,${$cA}[$i];
202 push @c,${$cC}[$i];
203 push @g,${$cG}[$i];
204 push @t,${$cT}[$i];
205 $ave = ((${$cA}[$i]+${$cC}[$i]+
206 ${$cG}[$i]+${$cT}[$i]) / 4 +$ave)/2;
210 for (my $i=0; $i<@a;$i++) {
211 my $zero=($a[$i]+$c[$i]+$g[$i]+$t[$i]);
212 next if ($zero==0);
213 push @fa, $a[$i];
214 push @fc, $c[$i];
215 push @fg, $g[$i];
216 push @ft, $t[$i];
218 return (-pA=>\@fa,-pC=>\@fc,-pG=>\@fg,-pT=>\@ft, -id=>$id, -accession_number=>$accn)
221 sub _parse_ref {
222 my $self=shift;
223 my ($authors,$title,$loc,@refs,$tr,$db,$dbid);
224 while (my $refline=$self->_readline) { #Poorely designed, should go through an array with fields
225 chomp $refline;
226 my ($field,$arg)=split(/\s+/,$refline,2);
227 last if ($field=~/XX/);
228 $field.=' ';
229 REF: {
230 if ($field=~/RX/) { #DB Reference
231 $refline=~s/[;\.]//g;
232 ($tr, $db, $dbid)=split(/\s+/,$refline);
233 last REF;
235 if ($field=~/RT/) { #Title
236 $title .= $arg;
237 last REF;
239 if ($field=~/RA/) { #Author
240 $authors .= $arg;
241 last REF;
243 if ($field=~/RL/) { #Journal
244 $loc .= $arg;
245 last REF;
249 my $reference=Bio::Annotation::Reference->new(-authors=>$authors, -title=>$title,
250 -location=>$loc);
251 if ($db eq 'MEDLINE') {
252 # does it ever equal medline?
253 $reference->medline($dbid);
255 elsif ($dbid) {
256 $reference->pubmed($dbid);
258 return $reference;
261 sub DESTROY {
262 my $self=shift;
263 $self->close;