add skips and a warning re: NeXML v0.9 support
[bioperl-live.git] / Bio / Draw / Pictogram.pm
blob1a2c9b86e37d660f6c3ab142fb0f1d2987692568
1 # BioPerl module for Bio::Draw::Pictogram
3 # Please direct questions and support issues to <bioperl-l@bioperl.org>
5 # Cared for by Shawn Hoon <shawnh@fugu-sg.org>
7 # Copyright Shawn Hoon
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
13 =head1 NAME
15 Bio::Draw::Pictogram - generate SVG output of Pictogram display for consensus motifs
17 =head1 SYNOPSIS
19 use Bio::Draw::Pictogram;
20 use Bio::SeqIO;
22 my $sio = Bio::SeqIO->new(-file=>$ARGV[0],-format=>'fasta');
23 my @seq;
24 while(my $seq = $sio->next_seq){
25 push @seq, $seq;
28 my $picto = Bio::Draw::Pictogram->new(-width=>"800",
29 -height=>"500",
30 -fontsize=>"60",
31 -plot_bits=>1,
32 -background=>{
33 'A'=>0.25,
34 'C'=>0.18,
35 'T'=>0.32,
36 'G'=>0.25},
37 -color=>{'A'=>'red',
38 'G'=>'blue',
39 'C'=>'green',
40 'T'=>'magenta'});
42 my $svg = $picto->make_svg(\@seq);
44 print $svg->xmlify."\n";
46 #Support for Bio::Matrix::PSM::SiteMatrix now included
48 use Bio::Matrix::PSM::IO;
50 my $picto = Bio::Draw::Pictogram->new(-width=>"800",
51 -height=>"500",
52 -fontsize=>"60",
53 -plot_bits=>1,
54 -background=>{
55 'A'=>0.25,
56 'C'=>0.18,
57 'T'=>0.32,
58 'G'=>0.25},
59 -color=>{'A'=>'red',
60 'G'=>'blue',
61 'C'=>'green',
62 'T'=>'magenta'});
64 my $psm = $psmIO->next_psm;
65 my $svg = $picto->make_svg($psm);
66 print $svg->xmlify;
68 =head1 DESCRIPTION
70 A module for generating SVG output of Pictogram display for consensus
71 motifs. This method of representation was describe by Burge and
72 colleagues: (Burge, C.B.,Tuschl, T., Sharp, P.A. in The RNA world II,
73 525-560, CSHL press, 1999)
75 This is a simple module that takes in an array of sequences (assuming
76 equal lengths) and calculates relative base frequencies where the
77 height of each letter reflects the frequency of each nucleotide at a
78 given position. It can also plot the information content at each
79 position scaled by the background frequencies of each nucleotide.
81 It requires the SVG-2.26 or later module by Ronan Oger available at
82 http://www.cpan.org
84 Recommended viewing of the SVG is the plugin available at Adobe:
85 http://www.adobe.com/svg
87 =head1 FEEDBACK
90 =head2 Mailing Lists
92 User feedback is an integral part of the evolution of this and other
93 Bioperl modules. Send your comments and suggestions preferably to one
94 of the Bioperl mailing lists. Your participation is much appreciated.
96 bioperl-l@bioperl.org - General discussion
97 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
99 =head2 Support
101 Please direct usage questions or support issues to the mailing list:
103 I<bioperl-l@bioperl.org>
105 rather than to the module maintainer directly. Many experienced and
106 reponsive experts will be able look at the problem and quickly
107 address it. Please include a thorough description of the problem
108 with code and data examples if at all possible.
110 =head2 Reporting Bugs
112 Report bugs to the Bioperl bug tracking system to help us keep track
113 the bugs and their resolution. Bug reports can be submitted via the
114 web:
116 https://redmine.open-bio.org/projects/bioperl/
118 =head1 AUTHOR - Shawn Hoon
120 Email shawnh@fugu-sg.org
122 =head1 APPENDIX
124 The rest of the documentation details each of the object
125 methods. Internal methods are usually preceded with a "_".
127 =cut
129 package Bio::Draw::Pictogram;
130 use strict;
131 use SVG 2.26;
132 use Bio::SeqIO;
133 use base qw(Bio::Root::Root);
135 use constant MAXBITS => 2;
137 =head2 new
139 Title : new
140 Usage : my $picto = Bio::Draw::Pictogram->new(-width=>"800",
141 -height=>"500",
142 -fontsize=>"60",
143 -plot_bits=>1,
144 -background=>{
145 'A'=>0.25,
146 'C'=>0.18,
147 'T'=>0.32,
148 'G'=>0.25},
149 -color=>{'A'=>'red',
150 'G'=>'blue',
151 'C'=>'green',
152 'T'=>'magenta'});
153 Function: Constructor for Pictogram Object
154 Returns : L<Bio::Draw::Pictogram>
156 =cut
158 sub new {
159 my ($caller,@args) = @_;
160 my $self = $caller->SUPER::new(@args);
161 my ($width,$height,$fontsize,$color,$background,$bit,$normalize) = $self->_rearrange([qw(WIDTH HEIGHT FONTSIZE COLOR BACKGROUND PLOT_BITS NORMALIZE)],@args);
162 $width||=800;
163 $height||=600;
164 my $svg = SVG->new(width=>$width,height=>$height);
165 $self->svg_obj($svg);
166 $fontsize ||= 80;
167 $self->fontsize($fontsize) if $fontsize;
168 $color = $color || {'T'=>'black','C'=>'blue','G'=>'green','A'=>'red'};
169 $self->color($color);
170 $background = $background || {'T'=>0.25,'C'=>0.25,'G'=>0.25,'A'=>0.25};
171 $self->background($background);
172 $self->plot_bits($bit) if $bit;
173 $self->normalize($normalize) if $normalize;
175 return $self;
178 =head2 make_svg
180 Title : make_svg
181 Usage : $picto->make_svg();
182 Function: make the SVG object
183 Returns : L<SVG>
184 Arguments: A fasta file or array ref of L<Bio::Seq> objects or a L<Bio::Matrix::PSM::SiteMatrixI>
186 =cut
188 sub make_svg {
189 my ($self,$input) = @_;
190 my $fontsize = $self->fontsize;
191 my $size = $fontsize * 0.75;
192 my $width= $size;
193 my $height= $size+40;
194 my $color = $self->color;
196 #starting x coordinate for pictogram
197 my $x = 45+$size/2;
198 my $pos_y = $size * 2;
199 my $bit_y = $pos_y+40;
200 my @pwm;
202 my $bp = 1;
204 #input can be file or array ref of sequences
205 if(ref($input) eq 'ARRAY'){
206 @pwm = @{$self->_make_pwm($input)};
208 elsif(ref($input) && $input->isa("Bio::Matrix::PSM::SiteMatrixI")){
209 @pwm = $self->_make_pwm_from_site_matrix($input);
211 else {
212 my $sio = Bio::SeqIO->new(-file=>$input,-format=>"fasta");
213 my @seq;
214 while (my $seq = $sio->next_seq){
215 push @seq, $seq;
217 @pwm = @{$self->_make_pwm(\@seq)};
221 my $svg = $self->svg_obj;
222 my $seq_length = scalar(@pwm + 1) * $width + $x + $x;
223 my $seq_grp;
225 #scale the svg if length greater than svg width
226 if($seq_length > $svg->{-document}->{'width'}){
227 my $ratio = $svg->{-document}->{'width'}/($seq_length);
228 $seq_grp = $svg->group(transform=>"scale($ratio,1)");
230 else {
231 $seq_grp= $svg->group();
234 #do the drawing, each set is a base position
235 foreach my $set(@pwm){
236 my ($A,$C,$G,$T,$bits) = @$set;
237 my @array;
238 push @array, ['a',($A)];
239 push @array, ['g',($G)];
240 push @array, ['c',($C)];
241 push @array, ['t',($T)];
242 @array = sort {$b->[1]<=>$a->[1]}@array;
243 my $count = 1;
244 my $pos_group = $seq_grp->group(id=>"bp $bp");
245 my $prev_size;
246 my $y_trans;
248 #draw each letter at each position
249 foreach my $letter(@array){
250 my $scale;
251 if($self->normalize){
252 $scale = $letter->[1];
253 } else {
254 $scale = $letter->[1] * ($bits / MAXBITS);
257 if($count == 1){
258 if($self->normalize){
259 $y_trans = 0;
260 } else {
261 $y_trans = (1 - ($bits / MAXBITS)) * $size;
264 else {
265 $y_trans += $prev_size;
267 $pos_group->text('id'=> uc($letter->[0]).$bp,height=>$height,
268 'width'=>$width,x=>$x,y=>$size,
269 'transform'=>"translate(0,$y_trans),scale(1,$scale)",
270 'style'=>{"font-size"=>$fontsize,
271 'text-anchor'=>'middle',
272 'font-family'=>'Verdana',
273 'fill'=>$color->{uc $letter->[0]}})->cdata(uc $letter->[0]) if $scale > 0;
275 $prev_size = $scale * $size;
276 $count++;
278 #plot the bit if required
279 if($self->plot_bits){
280 $seq_grp->text('x'=>$x,
281 'y'=>$bit_y,
282 'style'=>{"font-size"=>'10',
283 'text-anchor'=>'middle',
284 'font-family'=>'Verdana',
285 'fill'=>'black'})->cdata($bits);
287 $bp++;
288 $x+=$width;
291 #plot the tags
292 $seq_grp->text(x=>int($width/2),y=>$bit_y,style=>{"font-size"=>'10','text-anchor'=>'middle','font-family'=>'Verdana','fill'=>'black'})->cdata("Bits:") if $self->plot_bits;
294 $seq_grp->text(x=>int($width/2),y=>$pos_y,style=>{"font-size"=>'10','text-anchor'=>'middle','font-family'=>'Verdana','fill'=>'black'})->cdata("Position:");
296 #plot the base positions
297 $x = 45+$size/2-int($width/2);
298 foreach my $nbr(1..($bp-1)){
299 $seq_grp->text(x=>$x+int($width/2),y=>$pos_y,style=>{"font-size"=>'10','text-anchor'=>'left','font-family'=>'Verdana','fill'=>'black'})->cdata($nbr);
300 $x+=$width;
304 # $seq_grp->transform("scale(2,2)");
306 return $self->svg_obj($svg);
309 sub _make_pwm_from_site_matrix{
310 my ($self,$matrix) = @_;
311 my $bgd = $self->background;
312 my @pwm;
313 my $consensus = $matrix->consensus;
314 foreach my $i(1..length($consensus)){
315 my %base = $matrix->next_pos;
316 my $bits;
317 $bits+=($base{pA} * log2($base{pA}/$bgd->{'A'}));
318 $bits+=($base{pC} * log2($base{pC}/$bgd->{'C'}));
319 $bits+=($base{pG} * log2($base{pG}/$bgd->{'G'}));
320 $bits+=($base{pT} * log2($base{pT}/$bgd->{'T'}));
321 push @pwm, [$base{pA},$base{pC},$base{pG},$base{pT},abs(sprintf("%.3f",$bits))];
323 return @pwm;
326 sub _make_pwm {
327 my ($self,$input) = @_;
328 my $count = 1;
329 my %hash;
330 my $bgd = $self->background;
331 #sum up the frequencies at each base pair
332 foreach my $seq(@$input){
333 my $string = $seq->seq;
334 $string = uc $string;
335 my @motif = split('',$string);
336 my $pos = 1;
337 foreach my $t(@motif){
338 $hash{$pos}{$t}++;
339 $pos++;
341 $count++;
344 #calculate relative freq
345 my @pwm;
347 #decrement last count
348 $count--;
349 foreach my $pos(sort{$a<=>$b} keys %hash){
350 my @array;
351 push @array,($hash{$pos}{'A'}||0)/$count;
352 push @array,($hash{$pos}{'C'}||0)/$count;
353 push @array,($hash{$pos}{'G'}||0)/$count;
354 push @array,($hash{$pos}{'T'}||0)/$count;
356 #calculate bits
357 # relative entropy (RelEnt) or Kullback-Liebler distance
358 # relent = sum fk * log2(fk/gk) where fk is frequency of nucleotide k and
359 # gk the background frequency of nucleotide k
361 my $bits;
362 $bits+=(($hash{$pos}{'A'}||0) / $count) * log2((($hash{$pos}{'A'}||0)/$count) / ($bgd->{'A'}));
363 $bits+=(($hash{$pos}{'C'}||0) / $count) * log2((($hash{$pos}{'C'}||0)/$count) / ($bgd->{'C'}));
364 $bits+=(($hash{$pos}{'G'}||0) / $count) * log2((($hash{$pos}{'G'}||0)/$count) / ($bgd->{'G'}));
365 $bits+=(($hash{$pos}{'T'}||0) / $count) * log2((($hash{$pos}{'T'}||0)/$count) / ($bgd->{'T'}));
366 push @array, abs(sprintf("%.3f",$bits));
368 push @pwm,\@array;
370 return $self->pwm(\@pwm);
374 ###various get/sets
376 =head2 fontsize
378 Title : fontsize
379 Usage : $picto->fontsize();
380 Function: get/set for fontsize
381 Returns : int
382 Arguments: int
384 =cut
386 sub fontsize {
387 my ($self,$obj) = @_;
388 if($obj){
389 $self->{'_fontsize'} = $obj;
391 return $self->{'_fontsize'};
394 =head2 color
396 Title : color
397 Usage : $picto->color();
398 Function: get/set for color
399 Returns : a hash reference
400 Arguments: a hash reference
402 =cut
404 sub color {
405 my ($self,$obj) = @_;
406 if($obj){
407 $self->{'_color'} = $obj;
409 return $self->{'_color'};
412 =head2 svg_obj
414 Title : svg_obj
415 Usage : $picto->svg_obj();
416 Function: get/set for svg_obj
417 Returns : L<SVG>
418 Arguments: L<SVG>
420 =cut
422 sub svg_obj {
423 my ($self,$obj) = @_;
424 if($obj){
425 $self->{'_svg_obj'} = $obj;
427 return $self->{'_svg_obj'};
430 =head2 plot_bits
432 Title : plot_bits
433 Usage : $picto->plot_bits();
434 Function: get/set for plot_bits to indicate whether to plot
435 information content at each base position
436 Returns :1/0
437 Arguments: 1/0
439 =cut
441 sub plot_bits {
442 my ($self,$obj) = @_;
443 if($obj){
444 $self->{'_plot_bits'} = $obj;
446 return $self->{'_plot_bits'};
449 =head2 normalize
451 Title : normalize
452 Usage : $picto->normalize($newval)
453 Function: get/set to make all columns the same height.
454 default is to scale height with information
455 content.
456 Returns : value of normalize (a scalar)
457 Args : on set, new value (a scalar or undef, optional)
460 =cut
462 sub normalize{
463 my $self = shift;
465 return $self->{'normalize'} = shift if @_;
466 return $self->{'normalize'};
469 =head2 background
471 Title : background
472 Usage : $picto->background();
473 Function: get/set for hash reference of nucleodtide bgd frequencies
474 Returns : hash reference
475 Arguments: hash reference
477 =cut
479 sub background {
480 my ($self,$obj) = @_;
481 if($obj){
482 $self->{'_background'} = $obj;
484 return $self->{'_background'};
487 =head2 pwm
489 Title : pwm
490 Usage : $picto->pwm();
491 Function: get/set for pwm
492 Returns : int
493 Arguments: int
495 =cut
497 sub pwm {
498 my ($self,$pwm) = @_;
499 if($pwm){
500 $self->{'_pwm'} = $pwm;
502 return $self->{'_pwm'};
505 #utility method for returning log 2
506 sub log2 {
507 my ($val) = @_;
508 return 0 if $val==0;
509 return log($val)/log(2);