Bio::Tools::CodonTable and Bio::Tools::IUPAC: use our and drop BEGIN blocks.
[bioperl-live.git] / lib / Bio / Tools / OddCodes.pm
blobe77a6b6f3e699575796988ca7bd3e285e30ead8a
1 #$Id$
2 #-----------------------------------------------------------------------------
3 # PACKAGE : OddCodes.pm
4 # PURPOSE : To write amino acid sequences in alternative alphabets
5 # AUTHOR : Derek Gatherer (D.Gatherer@organon.nhe.akzonobel.nl)
6 # SOURCE :
7 # CREATED : 8th July 2000
8 # MODIFIED :
9 # DISCLAIMER : I am employed in the pharmaceutical industry but my
10 # : employers do not endorse or sponsor this module
11 # : in any way whatsoever. The above email address is
12 # : given purely for the purpose of easy communication
13 # : with the author, and does not imply any connection
14 # : between my employers and anything written below.
15 # LICENCE : You may distribute this module under the same terms
16 # : as the rest of BioPerl.
17 #----------------------------------------------------------------------------
19 =head1 NAME
21 Bio::Tools::OddCodes - Object holding alternative alphabet coding for
22 one protein sequence
24 =head1 SYNOPSIS
26 # Take a sequence object from eg, an inputstream, and creates an
27 # object for the purposes of rewriting that sequence in another
28 # alphabet. These are abbreviated amino acid sequence alphabets,
29 # designed to simplify the statistical aspects of analysing protein
30 # sequences, by reducing the combinatorial explosion of the
31 # 20-letter alphabet. These abbreviated alphabets range in size
32 # from 2 to 8.
34 # Creating the OddCodes object, eg:
36 my $inputstream = Bio::SeqIO->new( '-file' => "seqfile",
37 '-format' => 'Fasta');
38 my $seqobj = $inputstream->next_seq();
39 my $oddcode_obj = Bio::Tools::Oddcodes->new(-seq => $seqobj);
41 # or:
43 my $seqobj = Bio::PrimarySeq->new
44 (-seq=>'[cut and paste a sequence here]',
45 -alphabet => 'protein',
46 -id => 'test');
47 my $oddcode_obj = Bio::Tools::OddCodes->new(-seq => $seqobj);
49 # do the alternative coding, returning the answer as a reference to
50 # a string
52 my $output = $oddcode_obj->structural();
53 my $output = $oddcode_obj->chemical();
54 my $output = $oddcode_obj->functional();
55 my $output = $oddcode_obj->charge();
56 my $output = $oddcode_obj->hydrophobic();
57 my $output = $oddcode_obj->Dayhoff();
58 my $output = $oddcode_obj->Sneath();
59 my $output = $oddcode_obj->Stanfel();
62 # display sequence in new form, eg:
64 my $new_coding = $$output;
65 print "\n$new_coding";
67 =head1 DESCRIPTION
69 Bio::Tools::Oddcodes is a welterweight object for rewriting a protein
70 sequence in an alternative alphabet. Eight of these are provided, ranging
71 from the the 2-letter hydrophobic alphabet, to the 8-letter chemical
72 alphabet. These are useful for the statistical analysis of protein
73 sequences since they can partially avoid the combinatorial explosion
74 produced by the full 20-letter alphabet (eg. 400 dimers, 8000 trimers
75 etc.)
77 The objects will print out a warning if the input sequence is not a
78 protein. If you know what you are doing, you can silence the warning
79 by setting verbose() to a negative value.
81 See SYNOPSIS above for object creation code.
83 =head1 REFERENCES
85 Stanfel LE (1996) A new approach to clustering the amino acids. J. theor.
86 Biol. 183, 195-205.
88 Karlin S, Ost F and Blaisdell BE (1989) Patterns in DNA and amino acid
89 sequences and their statistical significance. Chapter 6 of: Mathematical
90 Methods for DNA Sequences. Waterman MS (ed.) CRC Press, Boca Raton , FL.
92 =head1 FEEDBACK
94 =head2 Mailing Lists
96 User feedback is an integral part of the evolution of this and other
97 Bioperl modules. Send your comments and suggestions preferably to one
98 of the Bioperl mailing lists. Your participation is much appreciated.
100 bioperl-l@bioperl.org - General discussion
101 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
103 =head2 Support
105 Please direct usage questions or support issues to the mailing list:
107 I<bioperl-l@bioperl.org>
109 rather than to the module maintainer directly. Many experienced and
110 reponsive experts will be able look at the problem and quickly
111 address it. Please include a thorough description of the problem
112 with code and data examples if at all possible.
114 =head2 Reporting Bugs
116 Report bugs to the Bioperl bug tracking system to help us keep track
117 the bugs and their resolution. Bug reports can be submitted via the
118 web:
120 https://github.com/bioperl/bioperl-live/issues
122 =head1 AUTHOR
124 Derek Gatherer
126 =head1 APPENDIX
128 The rest of the documentation details each of the object methods.
129 Internal methods are usually preceded with a _
131 =cut
133 package Bio::Tools::OddCodes;
135 use strict;
138 use base qw(Bio::Root::Root);
140 sub new
142 my($class,@args) = @_;
144 my $self = $class->SUPER::new(@args);
146 my ($seqobj) = $self->_rearrange([qw(SEQ)],@args);
147 if((! defined($seqobj)) && @args && ref($args[0])) {
148 # parameter not passed as named parameter?
149 $seqobj = $args[0];
151 unless ($seqobj->isa("Bio::PrimarySeqI"))
153 $self->throw("Bio::Tools::OddCodes only works on PrimarySeqI objects");
156 $self->{'_seqref'} = $seqobj;
158 return $self;
161 =head2 structural
163 Title : structural
164 Usage : $output = $oddcode_obj->structural();
165 Function: turns amino acid sequence into 3-letter structural alphabet
166 : A (ambivalent), E (external), I (internal)
167 Example : a sequence ACDEFGH will become AAEEIAE
168 Returns : Reference to the new sequence string
169 Args : none
171 =cut
173 sub structural()
175 my $self = $_[0];
176 my $seqstring = &_pullseq($self); # see _pullseq() below
178 # now the real business
180 $seqstring =~ tr/[ACGPSTWY]/1/;
181 $seqstring =~ tr/[RNDQEHK]/2/;
182 $seqstring =~ tr/[ILMFV]/3/;
183 $seqstring =~ tr/1/A/;
184 $seqstring =~ tr/2/E/;
185 $seqstring =~ tr/3/I/;
187 return \$seqstring;
189 # and that's that one
192 =head2 functional
194 Title : functional
195 Usage : $output = $oddcode_obj->functional();
196 Function: turns amino acid sequence into 4-letter functional alphabet
197 : A (acidic), C (basic), H (hydrophobic), P (polar)
198 Example : a sequence ACDEFGH will become HPAAHHC
199 Returns : Reference to the new sequence string
200 Args : none
202 =cut
204 sub functional()
206 my $self = $_[0];
207 my $seqstring = &_pullseq($self);
209 # now the real business
211 $seqstring =~ tr/[DE]/1/;
212 $seqstring =~ tr/[HKR]/2/;
213 $seqstring =~ tr/[AFILMPVW]/3/;
214 $seqstring =~ tr/[CGNQSTY]/4/;
215 $seqstring =~ tr/1/A/;
216 $seqstring =~ tr/2/C/;
217 $seqstring =~ tr/3/H/;
218 $seqstring =~ tr/4/P/;
220 return \$seqstring;
222 # and that's that one
225 =head2 hydrophobic
227 Title : hydrophobic
228 Usage : $output = $oddcode_obj->hydrophobic();
229 Function: turns amino acid sequence into 2-letter hydrophobicity alphabet
230 : O (hydrophobic), I (hydrophilic)
231 Example : a sequence ACDEFGH will become OIIIOII
232 Returns : Reference to the new sequence string
233 Args : none
235 =cut
237 sub hydrophobic()
239 my $self = $_[0];
240 my $seqstring = &_pullseq($self);
242 # now the real business
244 $seqstring =~ tr/[AFILMPVW]/1/;
245 $seqstring =~ tr/[CDEGHKNQRSTY]/2/;
246 $seqstring =~ tr/1/I/;
247 $seqstring =~ tr/2/O/;
249 return \$seqstring;
251 # and that's that one
254 =head2 Dayhoff
256 Title : Dayhoff
257 Usage : $output = $oddcode_obj->Dayhoff();
258 Function: turns amino acid sequence into 6-letter Dayhoff alphabet
259 Example : a sequence ACDEFGH will become CADDGCE
260 : A (=C), C (=AGPST), D (=DENQ),
261 : E (=HKR), F (=ILMV), G (=FWY)
262 Returns : Reference to the new sequence string
263 Args : none
265 =cut
267 sub Dayhoff()
269 my $self = $_[0];
270 my $seqstring = &_pullseq($self);
272 # now the real business
274 $seqstring =~ tr/[C]/1/;
275 $seqstring =~ tr/[AGPST]/2/;
276 $seqstring =~ tr/[DENQ]/3/;
277 $seqstring =~ tr/[HKR]/4/;
278 $seqstring =~ tr/[ILMV]/5/;
279 $seqstring =~ tr/[FWY]/6/;
280 $seqstring =~ tr/1/A/;
281 $seqstring =~ tr/2/C/;
282 $seqstring =~ tr/3/D/;
283 $seqstring =~ tr/4/E/;
284 $seqstring =~ tr/5/F/;
285 $seqstring =~ tr/6/G/;
287 return \$seqstring;
289 # and that's that one
292 =head2 Sneath
294 Title : Sneath
295 Usage : $output = $oddcode_obj->Sneath();
296 Function: turns amino acid sequence into 7-letter Sneath alphabet
297 Example : a sequence ACDEFGH will become CEFFHCF
298 : A (=ILV), C (=AGP), D (=MNQ), E (=CST),
299 : F (=DE), G (=KR), H (=FHWY)
300 Returns : Reference to the new sequence string
301 Args : none
303 =cut
305 sub Sneath()
307 my $self = $_[0];
308 my $seqstring = &_pullseq($self);
310 # now the real business
312 $seqstring =~ tr/[ILV]/1/;
313 $seqstring =~ tr/[AGP]/2/;
314 $seqstring =~ tr/[MNQ]/3/;
315 $seqstring =~ tr/[CST]/4/;
316 $seqstring =~ tr/[DE]/5/;
317 $seqstring =~ tr/[KR]/6/;
318 $seqstring =~ tr/[FHWY]/7/;
319 $seqstring =~ tr/1/A/;
320 $seqstring =~ tr/2/C/;
321 $seqstring =~ tr/3/D/;
322 $seqstring =~ tr/4/E/;
323 $seqstring =~ tr/5/F/;
324 $seqstring =~ tr/6/G/;
325 $seqstring =~ tr/7/H/;
327 return \$seqstring;
329 # and that's that one
332 =head2 Stanfel
334 Title : Stanfel
335 Usage : $output = $oddcode_obj->Stanfel();
336 Function: turns amino acid sequence into 4-letter Stanfel alphabet
337 Example : a sequence ACDEFGH will become AACCDAE
338 : A (=ACGILMPSTV), C (=DENQ), D (=FWY), E (=HKR)
339 Returns : Reference to the new sequence string
340 Args : none
342 =cut
344 sub Stanfel()
346 my $self = $_[0];
347 my $seqstring = &_pullseq($self);
349 # now the real business
351 $seqstring =~ tr/[ACGILMPSTV]/1/;
352 $seqstring =~ tr/[DENQ]/2/;
353 $seqstring =~ tr/[FWY]/3/;
354 $seqstring =~ tr/[HKR]/4/;
355 $seqstring =~ tr/1/A/;
356 $seqstring =~ tr/2/C/;
357 $seqstring =~ tr/3/D/;
358 $seqstring =~ tr/4/E/;
360 return \$seqstring;
362 # and that's that one
365 =head2 chemical
367 Title : chemical
368 Usage : $output = $oddcode_obj->chemical();
369 Function: turns amino acid sequence into 8-letter chemical alphabet
370 : A (acidic), L (aliphatic), M (amide), R (aromatic)
371 : C (basic), H (hydroxyl), I (imino), S (sulphur)
372 Example : a sequence ACDEFGH will become LSAARAC
373 Returns : Reference to the new sequence string
374 Args : none
376 =cut
378 sub chemical()
380 my $self = $_[0];
381 my $seqstring = &_pullseq($self);
383 # now the real business
385 $seqstring =~ tr/[DE]/1/;
386 $seqstring =~ tr/[AGILV]/2/;
387 $seqstring =~ tr/[NQ]/3/;
388 $seqstring =~ tr/[FWY]/4/;
389 $seqstring =~ tr/[RHK]/5/;
390 $seqstring =~ tr/[ST]/6/;
391 $seqstring =~ tr/P/7/;
392 $seqstring =~ tr/[CM]/8/;
393 $seqstring =~ tr/1/A/;
394 $seqstring =~ tr/2/L/;
395 $seqstring =~ tr/3/M/;
396 $seqstring =~ tr/4/R/;
397 $seqstring =~ tr/5/C/;
398 $seqstring =~ tr/6/H/;
399 $seqstring =~ tr/7/I/;
400 $seqstring =~ tr/8/S/;
402 return \$seqstring;
404 # and that's that one
407 =head2 charge
409 Title : charge
410 Usage : $output = $oddcode_obj->charge();
411 Function: turns amino acid sequence into 3-letter charge alphabet
412 Example : a sequence ACDEFGH will become NNAANNC
413 : A (negative; NOT anode), C (positive; NOT cathode), N (neutral)
414 Returns : Reference to the new sequence string
415 Args : none
417 =cut
419 sub charge()
421 my $self = $_[0];
422 my $seqstring = &_pullseq($self);
424 # now the real business
426 $seqstring =~ tr/[DE]/1/;
427 $seqstring =~ tr/[HKR]/2/;
428 $seqstring =~ tr/[ACFGILMNPQSTVWY]/3/;
429 $seqstring =~ tr/1/A/;
430 $seqstring =~ tr/2/C/;
431 $seqstring =~ tr/3/N/;
433 return \$seqstring;
435 # and that's that one
438 # _pullseq is called within each of the subroutines
439 # it just checks a few things and returns the sequence
441 sub _pullseq
443 my $self = $_[0];
445 my $seqobj = $self->{'_seqref'};
447 unless ($seqobj->isa("Bio::PrimarySeqI"))
449 $self->throw("die, OddCodes works only on PrimarySeqI objects\n");
451 $self->warn("\tAll OddCode alphabets need a protein sequence,\n".
452 "\tbut BioPerl thinks this is not: [". $seqobj->id. "]")
453 unless $seqobj->alphabet eq 'protein' or $self->verbose < 0;;
455 my $seqstring = uc $seqobj->seq();
457 if(length($seqstring)<1)
459 $self->throw("$seqstring: die, sequence has zero length\n");
461 return $seqstring;