1 #---------------------------------------------------------
6 Bio::Matrix::PSM::IO::transfac - PSM transfac parser
10 See Bio::Matrix::PSM::IO for documentation
20 User feedback is an integral part of the evolution of this and other
21 Bioperl modules. Send your comments and suggestions preferably to one
22 of the Bioperl mailing lists. Your participation is much appreciated.
24 bioperl-l@bioperl.org - General discussion
25 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
29 Please direct usage questions or support issues to the mailing list:
31 I<bioperl-l@bioperl.org>
33 rather than to the module maintainer directly. Many experienced and
34 reponsive experts will be able look at the problem and quickly
35 address it. Please include a thorough description of the problem
36 with code and data examples if at all possible.
40 Report bugs to the Bioperl bug tracking system to help us keep track
41 the bugs and their resolution. Bug reports can be submitted via the
44 http://bugzilla.open-bio.org/
46 =head1 AUTHOR - Stefan Kirov
55 # Let the code begin...
56 package Bio
::Matrix
::PSM
::IO
::transfac
;
57 use Bio
::Matrix
::PSM
::Psm
;
59 use Bio
::Annotation
::Reference
;
60 use Bio
::Annotation
::Comment
;
61 use Bio
::Annotation
::DBLink
;
64 use base
qw(Bio::Matrix::PSM::PsmHeader Bio::Matrix::PSM::IO);
69 Usage : my $psmIO = Bio::Matrix::PSM::IO->new(-format=>'transfac',
71 Function: Associates a file with the appropriate parser
75 Returns : "Bio::Matrix::PSM::$format"->new(@args);
82 my $self = $class->SUPER::new
(@args);
83 my ($file)=$self->_rearrange(['FILE'], @args);
84 $self->_initialize_io(@args) || warn "Did you intend to use STDIN?"; #Read only for now
87 $line=$self->_readline;
89 push @
{$self->{unstructured
}},$line if (length($line)>2); } until ($line =~ m{^//}) || (!defined($line)); #Unstructured header
98 Usage : my $psm=$psmIO->next_psm();
99 Function: Reads the next PSM from the input file, associated with this object
100 Throws : Upon finding a line, defining the matrix, where one or more positions
101 are not defined, see _make_matrix
102 Returns : Bio::Matrix::PSM::Psm object
110 return if ($self->{end
});
111 my (@a,@c,@g,@t, $id, $tr1, @refs,$accn, $bf, $sites);
113 while (defined( $line=$self->_readline)) {
115 if ($line=~/^\d{2}/) { #Begining of the frequency data
116 ($a[$i],$c[$i],$g[$i],$t[$i])=_parse_matrix
($line);
119 ($tr1,$accn)=split(/\s{2}/,$line) if ($line=~/^AC\s/);
120 ($tr1,$bf)=split(/\s{2}/,$line) if ($line=~/^BF\s/);
121 ($tr1,$id)=split(/\s{2}/,$line) if ($line=~/^ID\s/);
122 last if (($line=~/^XX/) && ($i>0));
124 if (!(defined($id) && defined($accn))) {
128 while (defined( $line=$self->_readline)) { #How many sites?
129 if ($line=~/^BA\s/) {
130 my ($tr1,$ba)=split(/\s{2}/,$line);
131 ($sites)=split(/\s/,$ba);
133 if ($line=~/^RN/) { #Adding a reference as Bio::Annotation object (self)
134 # not interested in RN line itself, since has only transfac-specific
135 # reference id? - no push back of line
136 my $ref=_parse_ref
($self);
139 last if ($line=~m{^//});
141 # We have the frequencies, let's create a SiteMatrix object
142 my %matrix = &_make_matrix
($self,\
@a,\
@c,\
@g,\
@t,$id, $accn);
143 $matrix{-sites
}=$sites if ($sites);
145 my $psm=Bio
::Matrix
::PSM
::Psm
->new(%matrix);
146 foreach my $ref (@refs) { $psm->add_Annotation('reference',$ref); }
154 Function: Parses a line
156 Example : Internal stuff
157 Returns : array (frequencies for A,C,G,T in this order).
165 my ($tr,$a,$c,$g,$t)=split(/,/,$line);
175 Throws : If a position is undefined, for example if you have line like this
176 in the file you are parsing: 08 4,7,,9
177 Example : Internal stuff
184 my ($a, $c, $g, $t, @fa, @fc,@fg, @ft, @a,@c,@g,@t);
186 my ($self,$cA,$cC,$cG,$cT, $id, $accn)= @_;
188 for (my $i=0; $i < @
{$cA};$i++) {
189 #No value can be undefined -throw an exception, since setting to 0 probably would be wrong
190 #If this happens it would indicate most probably that the file, being parsed is in a different format
191 map { $self->throw('Parsing error, a position is not defined') unless defined(${$_}[$i]) } ($cA, $cG, $cC, $cT);
193 if ( (${$cA}[$i] + ${$cC}[$i] +
194 ${$cG}[$i] + ${$cT}[$i] ) ==0 ) {
205 $ave = ((${$cA}[$i]+${$cC}[$i]+
206 ${$cG}[$i]+${$cT}[$i]) / 4 +$ave)/2;
210 for (my $i=0; $i<@a;$i++) {
211 my $zero=($a[$i]+$c[$i]+$g[$i]+$t[$i]);
218 return (-pA
=>\
@fa,-pC
=>\
@fc,-pG
=>\
@fg,-pT
=>\
@ft, -id
=>$id, -accession_number
=>$accn)
223 my ($authors,$title,$loc,@refs,$tr,$db,$dbid);
224 while (my $refline=$self->_readline) { #Poorely designed, should go through an array with fields
226 my ($field,$arg)=split(/\s+/,$refline,2);
227 last if ($field=~/XX/);
230 if ($field=~/RX/) { #DB Reference
231 $refline=~s/[;\.]//g;
232 ($tr, $db, $dbid)=split(/\s+/,$refline);
235 if ($field=~/RT/) { #Title
239 if ($field=~/RA/) { #Author
243 if ($field=~/RL/) { #Journal
249 my $reference=Bio
::Annotation
::Reference
->new(-authors
=>$authors, -title
=>$title,
251 if ($db eq 'MEDLINE') {
252 # does it ever equal medline?
253 $reference->medline($dbid);
256 $reference->pubmed($dbid);