1 # BioPerl module for Bio::Draw::Pictogram
3 # Please direct questions and support issues to <bioperl-l@bioperl.org>
5 # Cared for by Shawn Hoon <shawnh@fugu-sg.org>
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
15 Bio::Draw::Pictogram - generate SVG output of Pictogram display for consensus motifs
19 use Bio::Draw::Pictogram;
22 my $sio = Bio::SeqIO->new(-file=>$ARGV[0],-format=>'fasta');
24 while(my $seq = $sio->next_seq){
28 my $picto = Bio::Draw::Pictogram->new(-width=>"800",
42 my $svg = $picto->make_svg(\@seq);
44 print $svg->xmlify."\n";
46 #Support for Bio::Matrix::PSM::SiteMatrix now included
48 use Bio::Matrix::PSM::IO;
50 my $picto = Bio::Draw::Pictogram->new(-width=>"800",
64 my $psm = $psmIO->next_psm;
65 my $svg = $picto->make_svg($psm);
70 A module for generating SVG output of Pictogram display for consensus
71 motifs. This method of representation was describe by Burge and
72 colleagues: (Burge, C.B.,Tuschl, T., Sharp, P.A. in The RNA world II,
73 525-560, CSHL press, 1999)
75 This is a simple module that takes in an array of sequences (assuming
76 equal lengths) and calculates relative base frequencies where the
77 height of each letter reflects the frequency of each nucleotide at a
78 given position. It can also plot the information content at each
79 position scaled by the background frequencies of each nucleotide.
81 It requires the SVG-2.26 or later module by Ronan Oger available at
84 Recommended viewing of the SVG is the plugin available at Adobe:
85 http://www.adobe.com/svg
92 User feedback is an integral part of the evolution of this and other
93 Bioperl modules. Send your comments and suggestions preferably to one
94 of the Bioperl mailing lists. Your participation is much appreciated.
96 bioperl-l@bioperl.org - General discussion
97 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
101 Please direct usage questions or support issues to the mailing list:
103 I<bioperl-l@bioperl.org>
105 rather than to the module maintainer directly. Many experienced and
106 reponsive experts will be able look at the problem and quickly
107 address it. Please include a thorough description of the problem
108 with code and data examples if at all possible.
110 =head2 Reporting Bugs
112 Report bugs to the Bioperl bug tracking system to help us keep track
113 the bugs and their resolution. Bug reports can be submitted via the
116 https://redmine.open-bio.org/projects/bioperl/
118 =head1 AUTHOR - Shawn Hoon
120 Email shawnh@fugu-sg.org
124 The rest of the documentation details each of the object
125 methods. Internal methods are usually preceded with a "_".
129 package Bio
::Draw
::Pictogram
;
133 use base
qw(Bio::Root::Root);
135 use constant MAXBITS
=> 2;
140 Usage : my $picto = Bio::Draw::Pictogram->new(-width=>"800",
153 Function: Constructor for Pictogram Object
154 Returns : L<Bio::Draw::Pictogram>
159 my ($caller,@args) = @_;
160 my $self = $caller->SUPER::new
(@args);
161 my ($width,$height,$fontsize,$color,$background,$bit,$normalize) = $self->_rearrange([qw(WIDTH HEIGHT FONTSIZE COLOR BACKGROUND PLOT_BITS NORMALIZE)],@args);
164 my $svg = SVG
->new(width
=>$width,height
=>$height);
165 $self->svg_obj($svg);
167 $self->fontsize($fontsize) if $fontsize;
168 $color = $color || {'T'=>'black','C'=>'blue','G'=>'green','A'=>'red'};
169 $self->color($color);
170 $background = $background || {'T'=>0.25,'C'=>0.25,'G'=>0.25,'A'=>0.25};
171 $self->background($background);
172 $self->plot_bits($bit) if $bit;
173 $self->normalize($normalize) if $normalize;
181 Usage : $picto->make_svg();
182 Function: make the SVG object
184 Arguments: A fasta file or array ref of L<Bio::Seq> objects or a L<Bio::Matrix::PSM::SiteMatrixI>
189 my ($self,$input) = @_;
190 my $fontsize = $self->fontsize;
191 my $size = $fontsize * 0.75;
193 my $height= $size+40;
194 my $color = $self->color;
196 #starting x coordinate for pictogram
198 my $pos_y = $size * 2;
199 my $bit_y = $pos_y+40;
204 #input can be file or array ref of sequences
205 if(ref($input) eq 'ARRAY'){
206 @pwm = @
{$self->_make_pwm($input)};
208 elsif(ref($input) && $input->isa("Bio::Matrix::PSM::SiteMatrixI")){
209 @pwm = $self->_make_pwm_from_site_matrix($input);
212 my $sio = Bio
::SeqIO
->new(-file
=>$input,-format
=>"fasta");
214 while (my $seq = $sio->next_seq){
217 @pwm = @
{$self->_make_pwm(\
@seq)};
221 my $svg = $self->svg_obj;
222 my $seq_length = scalar(@pwm + 1) * $width + $x + $x;
225 #scale the svg if length greater than svg width
226 if($seq_length > $svg->{-document
}->{'width'}){
227 my $ratio = $svg->{-document
}->{'width'}/($seq_length);
228 $seq_grp = $svg->group(transform
=>"scale($ratio,1)");
231 $seq_grp= $svg->group();
234 #do the drawing, each set is a base position
235 foreach my $set(@pwm){
236 my ($A,$C,$G,$T,$bits) = @
$set;
238 push @array, ['a',($A)];
239 push @array, ['g',($G)];
240 push @array, ['c',($C)];
241 push @array, ['t',($T)];
242 @array = sort {$b->[1]<=>$a->[1]}@array;
244 my $pos_group = $seq_grp->group(id
=>"bp $bp");
248 #draw each letter at each position
249 foreach my $letter(@array){
251 if($self->normalize){
252 $scale = $letter->[1];
254 $scale = $letter->[1] * ($bits / MAXBITS
);
258 if($self->normalize){
261 $y_trans = (1 - ($bits / MAXBITS
)) * $size;
265 $y_trans += $prev_size;
267 $pos_group->text('id'=> uc($letter->[0]).$bp,height
=>$height,
268 'width'=>$width,x
=>$x,y
=>$size,
269 'transform'=>"translate(0,$y_trans),scale(1,$scale)",
270 'style'=>{"font-size"=>$fontsize,
271 'text-anchor'=>'middle',
272 'font-family'=>'Verdana',
273 'fill'=>$color->{uc $letter->[0]}})->cdata(uc $letter->[0]) if $scale > 0;
275 $prev_size = $scale * $size;
278 #plot the bit if required
279 if($self->plot_bits){
280 $seq_grp->text('x'=>$x,
282 'style'=>{"font-size"=>'10',
283 'text-anchor'=>'middle',
284 'font-family'=>'Verdana',
285 'fill'=>'black'})->cdata($bits);
292 $seq_grp->text(x
=>int($width/2),y
=>$bit_y,style
=>{"font-size"=>'10','text-anchor'=>'middle','font-family'=>'Verdana','fill'=>'black'})->cdata("Bits:") if $self->plot_bits;
294 $seq_grp->text(x
=>int($width/2),y
=>$pos_y,style
=>{"font-size"=>'10','text-anchor'=>'middle','font-family'=>'Verdana','fill'=>'black'})->cdata("Position:");
296 #plot the base positions
297 $x = 45+$size/2-int($width/2);
298 foreach my $nbr(1..($bp-1)){
299 $seq_grp->text(x
=>$x+int($width/2),y
=>$pos_y,style
=>{"font-size"=>'10','text-anchor'=>'left','font-family'=>'Verdana','fill'=>'black'})->cdata($nbr);
304 # $seq_grp->transform("scale(2,2)");
306 return $self->svg_obj($svg);
309 sub _make_pwm_from_site_matrix
{
310 my ($self,$matrix) = @_;
311 my $bgd = $self->background;
313 my $consensus = $matrix->consensus;
314 foreach my $i(1..length($consensus)){
315 my %base = $matrix->next_pos;
317 $bits+=($base{pA
} * log2
($base{pA
}/$bgd->{'A'}));
318 $bits+=($base{pC
} * log2
($base{pC
}/$bgd->{'C'}));
319 $bits+=($base{pG
} * log2
($base{pG
}/$bgd->{'G'}));
320 $bits+=($base{pT
} * log2
($base{pT
}/$bgd->{'T'}));
321 push @pwm, [$base{pA
},$base{pC
},$base{pG
},$base{pT
},abs(sprintf("%.3f",$bits))];
327 my ($self,$input) = @_;
330 my $bgd = $self->background;
331 #sum up the frequencies at each base pair
332 foreach my $seq(@
$input){
333 my $string = $seq->seq;
334 $string = uc $string;
335 my @motif = split('',$string);
337 foreach my $t(@motif){
344 #calculate relative freq
347 #decrement last count
349 foreach my $pos(sort{$a<=>$b} keys %hash){
351 push @array,($hash{$pos}{'A'}||0)/$count;
352 push @array,($hash{$pos}{'C'}||0)/$count;
353 push @array,($hash{$pos}{'G'}||0)/$count;
354 push @array,($hash{$pos}{'T'}||0)/$count;
357 # relative entropy (RelEnt) or Kullback-Liebler distance
358 # relent = sum fk * log2(fk/gk) where fk is frequency of nucleotide k and
359 # gk the background frequency of nucleotide k
362 $bits+=(($hash{$pos}{'A'}||0) / $count) * log2((($hash{$pos}{'A'}||0)/$count) / ($bgd->{'A'}));
363 $bits+=(($hash{$pos}{'C'}||0) / $count) * log2((($hash{$pos}{'C'}||0)/$count) / ($bgd->{'C'}));
364 $bits+=(($hash{$pos}{'G'}||0) / $count) * log2((($hash{$pos}{'G'}||0)/$count) / ($bgd->{'G'}));
365 $bits+=(($hash{$pos}{'T'}||0) / $count) * log2((($hash{$pos}{'T'}||0)/$count) / ($bgd->{'T'}));
366 push @array, abs(sprintf("%.3f",$bits));
370 return $self->pwm(\
@pwm);
379 Usage : $picto->fontsize();
380 Function: get/set for fontsize
387 my ($self,$obj) = @_;
389 $self->{'_fontsize'} = $obj;
391 return $self->{'_fontsize'};
397 Usage : $picto->color();
398 Function: get/set for color
399 Returns : a hash reference
400 Arguments: a hash reference
405 my ($self,$obj) = @_;
407 $self->{'_color'} = $obj;
409 return $self->{'_color'};
415 Usage : $picto->svg_obj();
416 Function: get/set for svg_obj
423 my ($self,$obj) = @_;
425 $self->{'_svg_obj'} = $obj;
427 return $self->{'_svg_obj'};
433 Usage : $picto->plot_bits();
434 Function: get/set for plot_bits to indicate whether to plot
435 information content at each base position
442 my ($self,$obj) = @_;
444 $self->{'_plot_bits'} = $obj;
446 return $self->{'_plot_bits'};
452 Usage : $picto->normalize($newval)
453 Function: get/set to make all columns the same height.
454 default is to scale height with information
456 Returns : value of normalize (a scalar)
457 Args : on set, new value (a scalar or undef, optional)
465 return $self->{'normalize'} = shift if @_;
466 return $self->{'normalize'};
472 Usage : $picto->background();
473 Function: get/set for hash reference of nucleodtide bgd frequencies
474 Returns : hash reference
475 Arguments: hash reference
480 my ($self,$obj) = @_;
482 $self->{'_background'} = $obj;
484 return $self->{'_background'};
490 Usage : $picto->pwm();
491 Function: get/set for pwm
498 my ($self,$pwm) = @_;
500 $self->{'_pwm'} = $pwm;
502 return $self->{'_pwm'};
505 #utility method for returning log 2
509 return log($val)/log(2);