3 # BioPerl module for IUPAC
5 # Cared for by Aaron Mackey <amackey@virginia.edu>
7 # Copyright Aaron Mackey
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
15 Bio::Tools::IUPAC - Generates unique Seq objects from an ambiguous Seq object
20 use Bio::Tools::IUPAC;
22 my $ambiseq = Bio::Seq->new(-seq => 'ARTCGUTGR', -alphabet => 'dna');
23 my $stream = Bio::Tools::IUPAC->new(-seq => $ambiseq);
25 while ($uniqueseq = $stream->next_seq()) {
26 # process the unique Seq object.
31 IUPAC is a tool that produces a stream of unique, "strict"-satisfying Seq
32 objects from an ambiquous Seq object (containing non-standard characters given
33 the meaning shown below)
35 Extended DNA / RNA alphabet :
36 (includes symbols for nucleotide ambiguity)
37 ------------------------------------------
38 Symbol Meaning Nucleic Acid
39 ------------------------------------------
58 IUPAC-IUB SYMBOLS FOR NUCLEOTIDE NOMENCLATURE:
59 Cornish-Bowden (1985) Nucl. Acids Res. 13: 3021-3030.
61 -----------------------------------
64 ------------------------------------------
66 ------------------------------------------
68 B Aspartic Acid, Asparagine
92 Z Glutamic Acid, Glutamine
96 IUPAC-IUP AMINO ACID SYMBOLS:
97 Biochem J. 1984 Apr 15; 219(2): 345-373
98 Eur J Biochem. 1993 Apr 1; 213(1): 2
104 User feedback is an integral part of the evolution of this and other
105 Bioperl modules. Send your comments and suggestions preferably to one
106 of the Bioperl mailing lists. Your participation is much appreciated.
108 bioperl-l@bioperl.org - General discussion
109 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
111 =head2 Reporting Bugs
113 Report bugs to the Bioperl bug tracking system to help us keep track
114 the bugs and their resolution. Bug reports can be submitted via the
117 http://bugzilla.open-bio.org/
119 =head1 AUTHOR - Aaron Mackey
121 Email amackey-at-virginia.edu
125 The rest of the documentation details each of the object
126 methods. Internal methods are usually preceded with a _
131 # Let the code begin...
133 package Bio
::Tools
::IUPAC
;
136 use vars
qw(%IUP %IUB %REV_IUB $AUTOLOAD);
139 %IUB = ( A => [qw(A)],
157 %REV_IUB = (A
=> 'A',
176 %IUP = (A
=> [qw(A)],
206 use base
qw(Bio::Root::Root);
211 Usage : Bio::Tools::IUPAC->new( $seq)
212 Function: returns a new seq stream (akin to SeqIO)
213 Returns : a Bio::Tools::IUPAC stream object that will produce unique
214 Seq objects on demand.
215 Args : an ambiguously coded Seq.pm object that has a specified 'alphabet'
222 my($class,@args) = @_;
223 my $self = $class->SUPER::new
(@args);
225 my ($seq) = $self->_rearrange([qw(SEQ)],@args);
226 if((! defined($seq)) && @args && ref($args[0])) {
227 # parameter not passed as named parameter?
230 $seq->isa('Bio::Seq') or
231 $self->throw("Must supply a Seq.pm object to IUPAC!");
232 $self->{'_SeqObj'} = $seq;
233 if ($self->{'_SeqObj'}->alphabet() =~ m/^[dr]na$/i ) {
234 # nucleotide seq object
235 $self->{'_alpha'} = [ map { $IUB{uc($_)} }
236 split('', $self->{'_SeqObj'}->seq()) ];
237 } elsif ($self->{'_SeqObj'}->alphabet() =~ m/^protein$/i ) {
238 # amino acid seq object
239 $self->{'_alpha'} = [ map { $IUP{uc($_)} }
240 split('', $self->{'_SeqObj'}->seq()) ];
241 } else { # unknown type: we could make a guess, but let's not.
242 $self->throw("You must specify the 'type' of sequence provided to IUPAC");
244 $self->{'_string'} = [(0) x
length($self->{'_SeqObj'}->seq())];
245 scalar @
{$self->{'_string'}} or $self->throw("Sequence has zero-length!");
246 $self->{'_string'}->[0] = -1;
253 Usage : $iupac->next_seq()
254 Function: returns the next unique Seq object
255 Returns : a Seq.pm object
264 for my $i ( 0 .. $#{$self->{'_string'}} ) {
265 next unless $self->{'_string'}->[$i] || @
{$self->{'_alpha'}->[$i]} > 1;
266 if ( $self->{'_string'}->[$i] == $#{$self->{'_alpha'}->[$i]} ) { # rollover
267 if ( $i == $#{$self->{'_string'}} ) { # end of possibilities
270 $self->{'_string'}->[$i] = 0;
274 $self->{'_string'}->[$i]++;
276 $self->{'_SeqObj'}->seq(join('', map { $j++; $self->{'_alpha'}->[$j]->[$_]; } @
{$self->{'_string'}}));
277 my $desc = $self->{'_SeqObj'}->desc();
278 if ( !defined $desc ) { $desc = ""; }
281 1 while $self->{'_num'} =~ s/(\d)(\d\d\d)(?!\d)/$1,$2/;
282 $desc =~ s/( \[Bio::Tools::IUPAC-generated\sunique sequence # [^\]]*\])|$/ \[Bio::Tools::IUPAC-generated unique sequence # $self->{'_num'}\]/;
283 $self->{'_SeqObj'}->desc($desc);
284 $self->{'_num'} =~ s/,//g;
285 return $self->{'_SeqObj'};
293 Usage : my %aasymbols = $iupac->iupac_iup
294 Function: Returns a hash of PROTEIN symbols -> symbol components
308 Usage : my %dnasymbols = $iupac->iupac_iub
309 Function: Returns a hash of DNA symbols -> symbol components
321 Title : iupac_rev_iub
322 Usage : my %dnasymbols = $iupac->iupac_rev_iub
323 Function: Returns a hash of nucleotide combinations -> IUPAC code
324 (a reverse of the iupac_iub hash).
337 Usage : my $total = $iupac->count();
338 Function: Calculates the number of unique, unambiguous sequences that
339 this ambiguous sequence could generate
349 $count *= scalar(@
$_) for (@
{$self->{'_alpha'}});
357 my $method = $AUTOLOAD;
359 return $self->{'_SeqObj'}->$method(@_)
360 unless $method eq 'DESTROY';