t/SeqTools/SeqStats.t

   1 # -*-Perl-*- Test Harness script for Bioperl
   2 # $Id: SeqStats.t 11525 2007-06-27 10:16:38Z sendu $
   3
   4 use strict;
   5
   6 BEGIN {
   7     use lib '.';
   8     use Bio::Root::Test;
   9
  10     test_begin(-tests => 47);
  11
  12         use_ok('Bio::SeqIO');
  13         use_ok('Bio::Tools::SeqStats');
  14 }
  15
  16 my ($seqobj, $count, $seqobj_stats, $wt);
  17
  18 my $str = Bio::SeqIO->new(-file=> test_input_file('multifa.seq'), '-format' => 'Fasta');
  19 $seqobj = $str->next_seq();
  20 ok defined $seqobj, 'new Bio::Root::IO object';
  21
  22 my $seq_stats  =  Bio::Tools::SeqStats->new('-seq' => $seqobj);
  23 ok defined $seq_stats && $seq_stats, 'new Bio::Tools:SeqStats object';
  24
  25 # eg for DNA sequence
  26 my $hash_ref = $seq_stats->count_monomers();
  27 is ( $hash_ref->{'A'}, 80 , 'count_monomers()');
  28
  29 $hash_ref = $seq_stats-> count_codons();
  30 ok defined $hash_ref && $hash_ref , 'count_codons()';
  31
  32 my $weight = $seq_stats->get_mol_wt();
  33 ok defined $weight && $weight , 'get_mol_wt()' ;
  34
  35 $seqobj = Bio::PrimarySeq->new(-seq=>'ACTGTGGCGTCAACTG',
  36                                -alphabet=>'dna', -id=>'test');
  37 ok $seqobj_stats = Bio::Tools::SeqStats->new(-seq=>$seqobj);
  38 isa_ok $seqobj_stats, 'Bio::Tools::SeqStats';
  39
  40 $count = $seqobj_stats->count_monomers();  # for DNA sequence
  41 is $count->{'A'}, 3;
  42 is $count->{'C'}, 4;
  43 is $count->{'G'}, 5;
  44 is $count->{'T'}, 4;
  45
  46 $count = $seqobj_stats->count_codons();
  47 is $count->{'ACT'}, 2;
  48 is $count->{'GTG'}, 1;
  49 is $count->{'GCG'}, 1;
  50 is $count->{'TCA'}, 1;
  51
  52 $seqobj = Bio::PrimarySeq->new(-seq=>'ACTACTTCA', -alphabet=>'dna',
  53                                -id=>'test');
  54 $seqobj_stats  =  Bio::Tools::SeqStats->new('-seq' => $seqobj);
  55 $wt = $seqobj_stats->get_mol_wt();  # for DNA sequence
  56 is &round($$wt[0]), 2738 ;
  57
  58 $seqobj = Bio::PrimarySeq->new(-seq=>'ACXACNNCA',
  59                                -alphabet=>'dna', -id=>'test');
  60 $wt = Bio::Tools::SeqStats->get_mol_wt($seqobj);
  61 is &round($$wt[0]), 2693;
  62 is &round($$wt[1]), 2813;
  63
  64 $seqobj = Bio::PrimarySeq->new(-seq=>'ACTGTGGCGTCAACTG',
  65                                -alphabet=>'dna', -id=>'test');
  66 $count = Bio::Tools::SeqStats->count_monomers($seqobj);  # for DNA sequence
  67 is $count->{'A'}, 3;
  68 is $count->{'C'}, 4;
  69 is $count->{'G'}, 5;
  70 is $count->{'T'}, 4;
  71
  72 # protein
  73
  74 $seqobj = Bio::PrimarySeq->new(-seq=>'MQSERGITIDISLWKFETSKYYVT',
  75                                -alphabet=>'protein', -id=>'test');
  76 $seqobj_stats  =  Bio::Tools::SeqStats->new('-seq' => $seqobj);
  77 $count = $seqobj_stats->count_monomers();  # for amino sequence
  78 is $$count{'M'}, 1;
  79 is $$count{'I'}, 3;
  80 is $$count{'Y'}, 2;
  81 is $$count{'T'}, 3;
  82 $wt = Bio::Tools::SeqStats->get_mol_wt($seqobj);
  83 is int $$wt[0], 2896;
  84 is int $$wt[1], 2896;
  85
  86 # Issue 3185: https://redmine.open-bio.org/issues/3185
  87
  88 $seqobj = Bio::PrimarySeq->new(-seq=>'MQSERGITIDISLWKFETSKYYVT*',
  89                                -alphabet=>'protein', -id=>'test');
  90 is($seqobj->seq, 'MQSERGITIDISLWKFETSKYYVT*');
  91 $seqobj_stats  =  Bio::Tools::SeqStats->new('-seq' => $seqobj);
  92 $wt = Bio::Tools::SeqStats->get_mol_wt($seqobj);
  93 is int $$wt[0], 2896;
  94 is int $$wt[1], 2896;
  95
  96 # selenocysteine
  97 ok $seqobj = Bio::PrimarySeq->new(-seq=>'MQSERGITIDISLWKFETSKYYVT',
  98                                   -alphabet=>'protein');
  99 $wt = Bio::Tools::SeqStats->get_mol_wt($seqobj);
 100 is &round($$wt[0]), 2896 ;
 101
 102 # RNA
 103
 104 $seqobj = Bio::PrimarySeq->new(-seq=>'UYXUYNNYU', -alphabet=>'rna');
 105 $wt = Bio::Tools::SeqStats->get_mol_wt($seqobj);
 106 is &round($$wt[0]), 2768;
 107 is &round($$wt[1]), 2891;
 108
 109 ok $seqobj = Bio::PrimarySeq->new(-seq=>'TGCCGTGTGTGCTGCTGCT', -alphabet=>'rna');
 110 $wt = Bio::Tools::SeqStats->get_mol_wt($seqobj);
 111 is &round($$wt[0]), 6104 ;
 112
 113 #
 114 # hydropathicity aka "gravy" score
 115 #
 116
 117 # normal seq (should succeed)
 118 ok $seqobj = Bio::PrimarySeq->new(-seq=>'MSFVLVAPDMLATAAADVVQIGSAVSAGS',
 119                                   -alphabet=>'protein');
 120 my $gravy = Bio::Tools::SeqStats->hydropathicity($seqobj);
 121 is int($gravy*1000), 1224;  # check to nearest 0.1%
 122
 123 # ambiguous sequence (should fail)
 124 ok $seqobj = Bio::PrimarySeq->new(-seq=>'XXXB**BS', -alphabet=>'protein');
 125 eval { Bio::Tools::SeqStats->hydropathicity($seqobj) };
 126 like $@, qr/ambiguous amino acids/i;
 127
 128 # empty sequence (should fail)
 129 ok $seqobj = Bio::PrimarySeq->new(-seq=>'', -alphabet=>'protein');
 130 eval { Bio::Tools::SeqStats->hydropathicity($seqobj) };
 131 like $@, qr/hydropathicity not defined/i;
 132
 133 # DNA sequence (should fail)
 134 ok $seqobj = Bio::PrimarySeq->new(-seq=>'GATTACA', -alphabet=>'dna');
 135 eval { Bio::Tools::SeqStats->hydropathicity($seqobj) };
 136 like $@, qr/only meaningful for protein/;
 137
 138
 139 #
 140 # Extra functions
 141 #
 142
 143 # perl does not have an explicit rounding function
 144 sub round { return int ((shift @_) + 0.5 ) }
 145