1 # -*-Perl-*- Test Harness script for Bioperl
10 test_begin( -tests => 206 );
12 use_ok('Bio::SimpleAlign');
13 use_ok('Bio::AlignIO');
14 use_ok('Bio::SeqFeature::Generic');
15 use_ok('Bio::Location::Simple');
16 use_ok('Bio::Location::Split');
19 my $DEBUG = test_debug();
21 my ( $str, $aln, @seqs, $seq );
23 $str = Bio::AlignIO->new( -file => test_input_file('testaln.pfam') );
24 isa_ok( $str, 'Bio::AlignIO' );
25 $aln = $str->next_aln();
26 is $aln->get_seq_by_pos(1)->get_nse, '1433_LYCES/9-246', "pfam input test";
28 my $aln1 = $aln->remove_columns( ['mismatch'] );
31 '::*::::*:**:*:*:***:**.***::*.*::**::**:***..**:'
32 . '*:*.::::*:.:*.*.**:***.**:*.:.**::**.*:***********:::*:.:*:**.*::*:'
33 . '.*.:*:**:****************::',
37 my $aln2 = $aln->select( 1, 3 );
38 isa_ok( $aln2, 'Bio::Align::AlignI' );
39 is( $aln2->num_sequences, 3, 'num_sequences' );
41 # test select non contiguous-sorted by default
42 $aln2 = $aln->select_noncont( 6, 7, 8, 9, 10, 1, 2, 3, 4, 5 );
43 is( $aln2->num_sequences, 10, 'num_sequences' );
45 $aln2->get_seq_by_pos(2)->id,
46 $aln->get_seq_by_pos(2)->id,
50 $aln2->get_seq_by_pos(8)->id,
51 $aln->get_seq_by_pos(8)->id,
55 # test select non contiguous-nosort option
56 $aln2 = $aln->select_noncont( 'nosort', 6, 7, 8, 9, 10, 1, 2, 3, 4, 5 );
57 is( $aln2->num_sequences, 10, 'num_sequences' );
59 $aln2->get_seq_by_pos(2)->id,
60 $aln->get_seq_by_pos(7)->id,
64 $aln2->get_seq_by_pos(8)->id,
65 $aln->get_seq_by_pos(3)->id,
69 # test select non contiguous by name
70 my $aln3 = $aln->select_noncont_by_name('1433_LYCES','BMH1_YEAST','143T_HUMAN');
71 is( $aln3->num_sequences, 3, 'select_noncont_by_name' );
72 my @seqs3 = $aln3->each_seq();
73 is $seqs3[0]->id, '1433_LYCES', 'select_noncont_by_name';
74 is $seqs3[1]->id, 'BMH1_YEAST', 'select_noncont_by_name';
75 is $seqs3[2]->id, '143T_HUMAN', 'select_noncont_by_name';
78 @seqs = $aln->each_seq();
79 is scalar @seqs, 16, 'each_seq';
80 is $seqs[0]->get_nse, '1433_LYCES/9-246', 'get_nse';
81 is $seqs[0]->id, '1433_LYCES', 'id';
82 is $seqs[0]->num_gaps, 3, 'num_gaps';
83 @seqs = $aln->each_alphabetically();
84 is scalar @seqs, 16, 'each_alphabetically';
86 is $aln->column_from_residue_number( '1433_LYCES', 10 ), 2,
87 'column_from_residue_number';
88 is $aln->displayname( '1433_LYCES/9-246', 'my_seq' ), 'my_seq',
89 'display_name get/set';
90 is $aln->displayname('1433_LYCES/9-246'), 'my_seq', 'display_name get';
91 is substr( $aln->consensus_string(50), 0, 60 ),
92 "RE??VY?AKLAEQAERYEEMV??MK?VAE??????ELSVEERNLLSVAYKNVIGARRASW",
94 is substr( $aln->consensus_string(100), 0, 60 ),
95 "?????????L????E????M???M????????????L??E?RNL?SV?YKN??G??R??W",
97 is substr( $aln->consensus_string(0), 0, 60 ),
98 "REDLVYLAKLAEQAERYEEMVEFMKKVAELGAPAEELSVEERNLLSVAYKNVIGARRASW",
101 ok( @seqs = $aln->each_seq_with_id('143T_HUMAN') );
102 is scalar @seqs, 1, 'each_seq_with_id';
104 is $aln->is_flush, 1, 'is_flush';
105 ok( $aln->id('x') && $aln->id eq 'x', 'id get/set' );
107 is $aln->length, 242, 'length';
108 is $aln->num_residues, 3769, 'num_residues';
109 is $aln->num_sequences, 16, 'num_sequences';
110 is( sprintf( "%.2f", $aln->overall_percentage_identity() ),
111 33.06, 'overall_percentage_identity' );
112 is( sprintf( "%.2f", $aln->overall_percentage_identity('align') ),
113 33.06, 'overall_percentage_identity (align)' );
114 is( sprintf( "%.2f", $aln->overall_percentage_identity('short') ),
115 35.24, 'overall_percentage_identity (short)' );
116 is( sprintf( "%.2f", $aln->overall_percentage_identity('long') ),
117 33.47, 'overall_percentage_identity (long)' );
118 is( sprintf( "%.2f", $aln->average_percentage_identity() ),
119 66.91, 'average_percentage_identity' );
121 ok $aln->set_displayname_count;
122 is $aln->displayname('1433_LYCES/9-246'), '1433_LYCES_1',
123 'set_displayname_count';
124 ok $aln->set_displayname_flat;
125 is $aln->displayname('1433_LYCES/9-246'), '1433_LYCES', 'set_displayname_flat';
126 ok $aln->set_displayname_normal;
127 is $aln->displayname('1433_LYCES/9-246'), '1433_LYCES/9-246',
128 'set_displayname_normal';
130 ok $aln->map_chars( '\.', '-' );
131 @seqs = $aln->each_seq_with_id('143T_HUMAN');
132 is substr( $seqs[0]->seq, 0, 60 ),
133 'KTELIQKAKLAEQAERYDDMATCMKAVTEQGA---ELSNEERNLLSVAYKNVVGGRRSAW',
134 'uppercase, map_chars';
138 ' ::*::::* : * *: *: *:***:**.***::*.'
139 . ' *::**::**:*** . . ** :* :* . :: :: *: . :* .*. **:'
140 . '***.** :*. : .* * : : **.*:***********:::* : .: * :** .'
141 . '*::*: .*. : *: **:****************:: ',
144 ok $aln->remove_seq( $seqs[0] ), 'remove_seqs';
145 is $aln->num_sequences, 15, 'remove_seqs';
146 ok $aln->add_seq( $seqs[0] ), 'add_seq';
147 is $aln->num_sequences, 16, 'add_seq';
148 ok $seq = $aln->get_seq_by_pos(1), 'get_seq_by_pos';
149 is( $seq->id, '1433_LYCES', 'get_seq_by_pos' );
150 ok( ( $aln->missing_char(), 'P' ) and ( $aln->missing_char('X'), 'X' ) );
151 ok( ( $aln->match_char(), '.' ) and ( $aln->match_char('-'), '-' ) );
152 ok( ( $aln->gap_char(), '-' ) and ( $aln->gap_char('.'), '.' ) );
154 is $aln->purge(0.7), 12, 'purge';
155 is $aln->num_sequences, 4, 'purge';
158 test_skip( -tests => 24, -requires_module => 'IO::String' );
161 my $out = IO::String->new($string);
163 my $s1 = Bio::LocatableSeq->new(
165 -seq => 'aawtat-tn-',
170 my $s2 = Bio::LocatableSeq->new(
172 -seq => '-aaaat-tt-',
177 $a = Bio::SimpleAlign->new();
181 is( $a->consensus_iupac, "aAWWAT-TN-", 'IO::String consensus_iupac' );
182 $s1->seq('aaaaattttt');
183 $s1->alphabet('dna');
185 $s2->seq('-aaaatttt-');
187 $a = Bio::SimpleAlign->new();
191 my $strout = Bio::AlignIO->new( -fh => $out, -format => 'pfam' );
192 $strout->write_aln($a);
195 "AAA/1-10 aaaaattttt\n" . "BBB/1-8 -aaaatttt-\n",
196 'IO::String write_aln normal'
201 my $b = $a->slice( 2, 9 );
202 $strout->write_aln($b);
204 "AAA/2-9 aaaatttt\n" . "BBB/1-8 aaaatttt\n",
205 'IO::String write_aln slice';
209 $b = $a->slice( 9, 10 );
210 $strout->write_aln($b);
212 "AAA/9-10 tt\n" . "BBB/8-8 t-\n",
213 'IO::String write_aln slice';
218 $b = $a->slice( 1, 2 );
219 $strout->write_aln($b);
221 "AAA/1-2 aa\n" . "BBB/1-1 -a\n",
222 'IO::String write_aln slice';
224 # not sure what coordinates this should return...
228 $b = $a->slice( 1, 1, 1 );
229 $strout->write_aln($b);
231 "AAA/1-1 a\n" . "BBB/1-0 -\n",
232 'IO::String write_aln slice';
237 $b = $a->slice( 2, 2 );
238 $strout->write_aln($b);
240 "AAA/2-2 a\n" . "BBB/1-1 a\n",
241 'IO::String write_aln slice';
243 eval { $b = $a->slice( 11, 13 ); };
247 # remove_columns by position
250 $str = Bio::AlignIO->new( -file => test_input_file('mini-align.aln') );
251 $aln1 = $str->next_aln;
252 $aln2 = $aln1->remove_columns( [ 0, 0 ] );
253 $strout->write_aln($aln2);
255 "P84139/2-33 NEGEHQIKLDELFEKLLRARLIFKNKDVLRRC\n"
256 . "P814153/2-33 NEGMHQIKLDVLFEKLLRARLIFKNKDVLRRC\n"
257 . "BAB68554/1-14 ------------------AMLIFKDKQLLQQC\n"
258 . "gb|443893|124775/1-32 MRFRFQIKVPPAVEGARPALLIFKSRPELGGC\n",
259 'remove_columns by position';
261 # and when arguments are entered in "wrong order"?
264 my $aln3 = $aln1->remove_columns( [ 1, 1 ], [ 30, 30 ], [ 5, 6 ] );
265 $strout->write_aln($aln3);
267 "P84139/1-33 MEGEIKLDELFEKLLRARLIFKNKDVLRC\n"
268 . "P814153/1-33 MEGMIKLDVLFEKLLRARLIFKNKDVLRC\n"
269 . "BAB68554/1-14 ----------------AMLIFKDKQLLQC\n"
270 . "gb|443893|124775/2-32 -RFRIKVPPAVEGARPALLIFKSRPELGC\n",
271 'remove_columns by position (wrong order)';
273 my %cigars = $aln1->cigar_line;
274 is $cigars{'gb|443893|124775/1-32'}, '19,19:21,24:29,29:32,32',
276 is $cigars{'P814153/1-33'}, '20,20:22,25:30,30:33,33', 'cigar_line';
277 is $cigars{'BAB68554/1-14'}, '1,1:3,6:11,11:14,14', 'cigar_line';
278 is $cigars{'P84139/1-33'}, '20,20:22,25:30,30:33,33', 'cigar_line';
280 # sort_alphabetically
281 my $s3 = Bio::LocatableSeq->new(
283 -seq => '-attat-tt-',
290 is $a->get_seq_by_pos(2)->id, "BBB", 'sort_alphabetically - before';
291 ok $a->sort_alphabetically;
292 is $a->get_seq_by_pos(2)->id, "ABB", 'sort_alphabetically - after';
294 $b = $a->remove_gaps();
295 is $b->consensus_string, "aaaattt", 'remove_gaps';
297 $s1->seq('aaaaattt--');
299 $b = $a->remove_gaps( undef, 'all_gaps_only' );
300 is $b->consensus_string, "aaaaatttt", 'remove_gaps all_gaps_only';
302 # test set_new_reference:
303 $str = Bio::AlignIO->new( -file => test_input_file('testaln.clustalw') );
304 $aln = $str->next_aln();
305 my $new_aln = $aln->set_new_reference(3);
306 $a = $new_aln->get_seq_by_pos(1)->display_id;
307 $new_aln = $aln->set_new_reference('P851414');
308 $b = $new_aln->get_seq_by_pos(1)->display_id;
309 is $a, 'P851414', 'set_new_reference';
310 is $b, 'P851414', 'set_new_reference';
313 $str = Bio::AlignIO->new(
315 -file => test_input_file('testaln2.fasta')
317 $aln = $str->next_aln();
318 $new_aln = $aln->uniq_seq();
319 $a = $new_aln->num_sequences;
320 is $a, 11, 'uniq_seq';
322 # check if slice works well with a LocateableSeq in its negative strand
323 my $seq1 = Bio::LocatableSeq->new(
324 -SEQ => "ATGCTG-ATG",
331 my $seq2 = Bio::LocatableSeq->new(
332 -SEQ => "A-GCTGCATG",
340 my $aln_negative = Bio::SimpleAlign->new();
341 $aln_negative->add_seq($seq1);
342 $aln_negative->add_seq($seq2);
344 $aln_negative->column_from_residue_number(
345 $aln_negative->get_seq_by_pos(1)->display_id, 2 );
347 $aln_negative->column_from_residue_number(
348 $aln_negative->get_seq_by_pos(1)->display_id, 5 );
349 $aln_negative = $aln_negative->slice( $end_column, $start_column );
350 my $seq_negative = $aln_negative->get_seq_by_pos(1);
351 is( $seq_negative->start, 2, "bug 2099" );
352 is( $seq_negative->end, 5, "bug 2099" );
355 my $s11 = Bio::LocatableSeq->new( -id => 'testseq1', -seq => 'AAA' );
356 my $s21 = Bio::LocatableSeq->new( -id => 'testseq2', -seq => 'CCC' );
357 $a = Bio::SimpleAlign->new();
358 ok( $a->add_seq( $s11, 1 ), "bug 2793" );
359 is( $a->get_seq_by_pos(1)->seq, 'AAA', "bug 2793" );
360 ok( $a->add_seq( $s21, 2 ), "bug 2793" );
361 is( $a->get_seq_by_pos(2)->seq, 'CCC', "bug 2793" );
362 throws_ok { $a->add_seq( $s21, 0 ) } qr/must be >= 1/, 'Bad sequence, bad!';
365 # test for Bio::SimpleAlign annotation method and
366 # Bio::FeatureHolder stuff
368 $aln = Bio::SimpleAlign->new;
369 isa_ok($aln,"Bio::AnnotatableI");
371 for my $seqset ( [qw(one AGAGGAT)], [qw(two AGACGAT)], [qw(three AGAGGTT)] ) {
373 Bio::LocatableSeq->new(
380 is $aln->num_sequences, 3, 'added 3 seqs';
382 $aln->add_SeqFeature(
383 Bio::SeqFeature::Generic->new(
386 -primary_tag => 'charLabel',
389 $aln->add_SeqFeature(
390 Bio::SeqFeature::Generic->new(
393 -primary_tag => 'charLabel',
397 is( $aln->feature_count, 2, 'first 2 features added' );
399 my $splitloc = Bio::Location::Split->new;
400 $splitloc->add_sub_Location(
401 Bio::Location::Simple->new(
407 $splitloc->add_sub_Location(
408 Bio::Location::Simple->new(
414 $aln->add_SeqFeature(
415 Bio::SeqFeature::Generic->new(
416 -location => $splitloc,
417 -primary_tag => 'charLabel',
421 is( $aln->feature_count, 3, '3rd feature added' );
423 #do slices and masks as defined by the feature
425 my @slice_lens = qw(1 1 2 2);
426 for my $feature ( $aln->get_SeqFeatures ) {
427 for my $loc ( $feature->location->each_Location ) {
428 my $masked = $aln->mask_columns( $loc->start, $loc->end, '?');
430 lives_ok {my $fslice = $masked->slice( $loc->start, $loc->end )};
432 $masked->verbose(-1);
433 my $fslice = $masked->slice( $loc->start, $loc->end );
434 is( $fslice->length, $slice_lens[ $i++ ], "slice $i len" );
435 for my $s ( $fslice->each_seq ) {
436 like( $s->seq, qr/^\?+$/, 'correct masked seq' );
441 # test set_displayname_safe & restore_displayname:
442 $str = Bio::AlignIO->new( -file => test_input_file('pep-266.aln') );
443 $aln = $str->next_aln();
444 is $aln->get_seq_by_pos(3)->display_id, 'Smik_Contig1103.1',
445 'initial display id ok';
446 my ( $new_aln, $ref ) = $aln->set_displayname_safe();
447 is $new_aln->get_seq_by_pos(3)->display_id, 'S000000003', 'safe display id ok';
448 my $restored_aln = $new_aln->restore_displayname($ref);
449 is $restored_aln->get_seq_by_pos(3)->display_id, 'Smik_Contig1103.1',
450 'restored display id ok';
453 $str = Bio::AlignIO->new( -file => test_input_file('testaln.clustalw') );
454 my $list_file = test_input_file('testaln.list');
455 $aln = $str->next_aln();
456 $new_aln = $aln->sort_by_list($list_file);
457 $a = $new_aln->get_seq_by_pos(1)->display_id;
458 is $a, 'BAB68554', 'sort by list ok';
460 # test for Binary/Morphological/Mixed data
466 my $s1 = Bio::LocatableSeq->new(
468 -seq => 'aawtat-tn-',
473 my $s2 = Bio::LocatableSeq->new(
475 -seq => '-aaaat-tt-',
480 my $s3 = Bio::LocatableSeq->new(
482 -seq => '-aaaat-tt-',
487 $a = Bio::SimpleAlign->new();
492 @seqs = $a->each_seq;
493 is( $seqs[0]->start, 12 );
494 is( $seqs[1]->start, 1 );
495 is( $seqs[2]->start, 31 );
498 @seqs = $a->each_seq;
500 is( $seqs[0]->start, 1 );
501 is( $seqs[1]->start, 12 );
502 is( $seqs[2]->start, 31 );
505 'allele1' => 'GGATCCATT[C/C]CTACT',
506 'allele2' => 'GGAT[C/-][C/-]ATT[C/C]CT[A/C]CT',
507 'allele3' => 'G[G/C]ATCCATT[C/G]CTACT',
508 'allele4' => 'GGATCCATT[C/G]CTACT',
509 'allele5' => 'GGATCCATT[C/G]CTAC[T/A]',
510 'allele6' => 'GGATCCATT[C/G]CTA[C/G][T/A]',
511 'testseq' => 'GGATCCATT[C/G]CTACT'
514 my $alnin = Bio::AlignIO->new(
516 -file => test_input_file('alleles.fas')
519 $aln = $alnin->next_aln;
523 # compare all to test seq
525 for my $ls ( sort keys %testdata ) {
527 my $str = $aln->bracket_string(
528 -refseq => 'testseq',
529 -allele1 => 'allele1',
532 is( $str, $testdata{$ls}, "BIC:$str" );
536 'allele1' => 'GGATCCATT{C.C}CTACT',
537 'allele2' => 'GGAT{C.-}{C.-}ATT{C.C}CT{A.C}CT',
538 'allele3' => 'G{G.C}ATCCATT{C.G}CTACT',
539 'allele4' => 'GGATCCATT{C.G}CTACT',
540 'allele5' => 'GGATCCATT{C.G}CTAC{T.A}',
541 'allele6' => 'GGATCCATT{C.G}CTA{C.G}{T.A}',
542 'testseq' => 'GGATCCATT{C.G}CTACT'
545 for my $ls ( sort keys %testdata ) {
547 my $str = $aln->bracket_string(
548 -refseq => 'testseq',
549 -allele1 => 'allele1',
554 is( $str, $testdata{$ls}, "BIC:$str" );
557 # is _remove_col really working correctly?
558 my $a = Bio::LocatableSeq->new(
561 -seq => 'atcgatcgatcgatcg',
565 my $b = Bio::LocatableSeq->new(
568 -seq => '-tcgatc-atcgatcg',
572 my $c = Bio::LocatableSeq->new(
575 -seq => 'atcgatcgatc-atc-',
579 my $d = Bio::LocatableSeq->new(
582 -seq => '--cgatcgatcgat--',
586 my $e = Bio::LocatableSeq->new(
589 -seq => '-t-gatcgatcga-c-',
593 $aln = Bio::SimpleAlign->new();
598 my $gapless = $aln->remove_gaps();
599 foreach my $seq ( $gapless->each_seq ) {
600 if ( $seq->id eq 'a' ) {
603 is $seq->seq, 'tcgatcatcatc';
605 elsif ( $seq->id eq 'b' ) {
608 is $seq->seq, 'tcgatcatcatc';
610 elsif ( $seq->id eq 'c' ) {
613 is $seq->seq, 'tcgatcatcatc';
619 $gapless = $aln->remove_gaps();
620 foreach my $seq ( $gapless->each_seq ) {
621 if ( $seq->id eq 'a' ) {
624 is $seq->seq, 'gatcatca';
626 elsif ( $seq->id eq 'b' ) {
629 is $seq->seq, 'gatcatca';
631 elsif ( $seq->id eq 'c' ) {
634 is $seq->seq, 'gatcatca';
636 elsif ( $seq->id eq 'd' ) {
639 is $seq->seq, 'gatcatca';
641 elsif ( $seq->id eq 'e' ) {
644 is $seq->seq, 'gatcatca';
650 my $slice = $aln->slice(1,4);
652 for my $seq ($slice->each_seq) {
653 if ( $seq->id eq 'a' ) {
657 is $seq->seq, 'atcg';
659 elsif ( $seq->id eq 'b' ) {
663 is $seq->seq, '-tcg';
665 elsif ( $seq->id eq 'c' ) {
669 is $seq->seq, 'atcg';
671 elsif ( $seq->id eq 'd' ) {
675 is $seq->seq, '--cg';
677 elsif ( $seq->id eq 'e' ) {
681 is $seq->seq, '-t-g';
685 my $f = Bio::LocatableSeq->new(
687 -seq => 'a-cgatcgatcgat-g',
691 $aln = Bio::SimpleAlign->new();
695 $gapless = $aln->remove_gaps();
696 foreach my $seq ( $gapless->each_seq ) {
697 if ( $seq->id eq 'a' ) {
700 is $seq->seq, 'acgatcgatcgatg';
702 elsif ( $seq->id eq 'f' ) {
705 is $seq->seq, 'acgatcgatcgatg';
710 Bio::LocatableSeq->new( -id => 'g', -seq => 'atgc', -start => 5, -end => 8 );
711 my $h = Bio::LocatableSeq->new(
717 $aln = Bio::SimpleAlign->new();
721 # test for new method in API get_seq_by_id
722 my $retrieved = $aln->get_seq_by_id('g');
723 is( defined $retrieved, 1 );
724 my $removed = $aln->remove_columns( [ 1, 3 ] );
725 foreach my $seq ( $removed->each_seq ) {
726 if ( $seq->id eq 'g' ) {
731 elsif ( $seq->id eq 'h' ) {
738 # work out mask_columns(), see bug 2842
740 test_skip(-tests => 6, -requires_module => 'IO::String');
741 my $io = Bio::AlignIO->new( -file => test_input_file("testaln.clustalw") );
742 my $aln = $io->next_aln();
743 isa_ok( $aln, 'Bio::SimpleAlign' );
744 my $consensus = <<EOU;
745 MNEGEHQIKLDELFEKLLRARKIFKNKDVLRHSWEPKDLPHRHEQIEALAQILV
746 PVLRGETMKIIFCGHHACELGEDRGTKGFVIDELKDVDEDRNGKVDVIEINCEH
747 MDTHYRVLPNIAKLFDDCTGIGVPMHGGPTDEVTAKLKQVIDMKERFVIIVLDE
748 IDKLVKKSGDEVLYSLTRINTELKRAKVSVIGISNDLKFKEYLDPRVLSSLSEE
749 EVVFPPYDANQLRDILTQRAEEAFYPGVLDEGVIPLCAALAAREHGDARKALDL
750 LRVAGEIAEREGASKVTEKHVWKAQEKIEQDMMEEVIKTLPLQSKVLLYAIVLL
751 DENGDLPANTGDVYAVYRELCEYIDLEPLTQRRISDLINELDMLGIINAKVVSK
752 GRYGRTKEIRLMVTSYKIRNVLRYDYSIQPLLTISLKSEQRRLI
754 $consensus =~ s/\n//g;
756 is( $aln->consensus_string, $consensus, 'consensus string looks ok' );
759 my @cons_got = $aln->consensus_conservation;
760 # 422 positions, mostly two of six sequences conserved, set as default
761 my @cons_expect = (100 * 2/6) x 422;
762 # Exceptionally columns as a mask, manually determined (1-based columns)
763 $cons_expect[$_-1] = 100 * 1/6 for (5,12,41,70,82,310,390);
764 $cons_expect[$_-1] = 100 * 3/6 for (27,30,32,36,47,49,61,66,69,71,77,79,
765 81,91,96,97,105,114,115,117,118,121,122,129,140,146,156,159,160,162,
766 183,197,217,221,229,242,247,248,261,266,282,287,295,316,323,329,335,337,344,);
767 $cons_expect[$_-1] = 100 * 4/6 for (84,93,99,100,102,107,108,112,113,119,150,);
768 $cons_expect[$_-1] = 100 * 5/6 for (81,110);
769 # Format for string comparison
770 @cons_expect = map { sprintf "%4.1f", $_ } @cons_expect;
771 @cons_got = map { sprintf "%4.1f", $_ } @cons_got;
772 is(length($aln->consensus_string), scalar(@cons_got),"conservation length");
773 is_deeply(\@cons_got, \@cons_expect, "conservation scores");
776 is( aln2str( $aln => 'pfam' ), <<EOA, 'looks like correct unmasked alignment (from clustalw)' );
777 P84139/1-420 MNEGEHQIKLDELFEKLLRARKIFKNKDVLRHSYTPKDLPLRHEQIETLAQILVPVLRGETPSNIFVYG-KTGTGKTVTVK-FVTEELKRISEKYNIPVDVIYINCEIVDTHYRVLANIVNYFKDETGIGVPMVGWPTDEVYAKLKQVIDMKERFVIIVLDEIDKLVKKSGDEVLYSLTRINTELKRAKVSVIGISNDLKFKEYLDPRVLSSLSEEEVVFPPYDANQLRDILTQRAEEAFYPGVLDEGVIPLCAALAAREHGDARKALDLLRVAGEIAEREGASKVTEKHVWKAQEKIEQDMMEEVIKTRPLQSKVLLYAIVLLDENGDLPANTGDVYAVYRELCEYIDLEPLTQRRISDLINELDMLGIINAKVVSKGRYGRTKEIRLNVTSYKIRNVLRYDYSIQPLLTISLKSEQRRLI
778 P814153/1-420 MNEGMHQIKLDVLFEKLLRARKIFKNKDVLRHSYTPKDLPHRHEQIETLAQILVPVLRGETPSNIFVYG-KTGTGKTVTVK-FVTEELKRISEKYNIPVDVIYINCEIVDTHYRVLANIVNYFKDETGIEVPMVGWPTDEVYAKLKQVIDMKERFVIIVLDEIDKLVKKSGDEVLYSLTRINTELKRAKVSVIGISNDLKFKEYLDPRVLSSLSEEEVVFPPYDANQLRDILTQRAEEAFYPGVLDEGVIPLCAALAAREHGDARKALDLLRVAGEIAEREGASKVTEKHVWKAQEKIEQDMMEEVIKTLPLQSKVLLYAIVLLDENGDLPANTGDVYAVYRELCEYIDLEPLTQRRISDLINELDMLGIINAKVVSKGRYGRTKEIRLMVTSYKIRNVLRYDYSIQPLLTISLKSEQRRLI
779 P851414/1-60 -------------------------------------------------------------MKIVWCGH-ACFLVEDRGTK-ILIDPYPDVDEDRIGKVDYILQTHEHMD-HYGKTPLIAKLSD----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
780 P841414/1-60 -------------------------------------------------------------MKIVWCGH-ACFLVEDRGTK-ILIDPYPDVDEDRIGKVDYILVTHEHMD-HYGKTPLIAKLSD----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
781 BAB68554/1-141 --------------------MLTEDDKQLIQHVWEKVLEHQEDFGAEALERMFIVYPSTKTYFPHFDLHHDSEQIRHHGKK-VVGALGDAVKHIDNLSATLSELSNLHCY-NLRVDPVNFKLLSHCFQVVLGAHLG--REYTPQVQVAYDKFLAAVSAVLAEKYR-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
782 gb|443893|124775/1-331 -MRFRFGVVVPPAVAGARPELLVVGSRPELG-RWEPRGAVRLRPAGTAAGDGALALQEPGLWLGEVELA-AEEAAQDGAEPGRVDTFWYKFLKREPGGELSWEGNGPHHDRCCTYNENNLVDGVYCLPIG---HWGEATGHTNEMKHTTDFYFNIAGHQAMHYSRILPNIWLGSCPRQVEHVTIKLKHELGITAVMN-FQTEWDIVQNSSGCNRYPEPMTPDTMIKLYREEGLAYIWMP-TPDMSTEGRVQMLPQAVCLLHALLEKGHIVY-----VHCNAGVGRSTAAVCGWLQYVMGWNLRKVQYFLMAKRPAVYIDEEALARAQEDFFQKFGKVRSSVCSL------------------------------------------------------------------------------
785 my $newaln = $aln->mask_columns(12,20,'?');
786 is( aln2str( $newaln, 'pfam' ), <<EOA, 'looks like correct masked alignment (from clustalw)' );
787 P84139/1-420 MNEGEHQIKLD?????????RKIFKNKDVLRHSYTPKDLPLRHEQIETLAQILVPVLRGETPSNIFVYG-KTGTGKTVTVK-FVTEELKRISEKYNIPVDVIYINCEIVDTHYRVLANIVNYFKDETGIGVPMVGWPTDEVYAKLKQVIDMKERFVIIVLDEIDKLVKKSGDEVLYSLTRINTELKRAKVSVIGISNDLKFKEYLDPRVLSSLSEEEVVFPPYDANQLRDILTQRAEEAFYPGVLDEGVIPLCAALAAREHGDARKALDLLRVAGEIAEREGASKVTEKHVWKAQEKIEQDMMEEVIKTRPLQSKVLLYAIVLLDENGDLPANTGDVYAVYRELCEYIDLEPLTQRRISDLINELDMLGIINAKVVSKGRYGRTKEIRLNVTSYKIRNVLRYDYSIQPLLTISLKSEQRRLI
788 P814153/1-420 MNEGMHQIKLD?????????RKIFKNKDVLRHSYTPKDLPHRHEQIETLAQILVPVLRGETPSNIFVYG-KTGTGKTVTVK-FVTEELKRISEKYNIPVDVIYINCEIVDTHYRVLANIVNYFKDETGIEVPMVGWPTDEVYAKLKQVIDMKERFVIIVLDEIDKLVKKSGDEVLYSLTRINTELKRAKVSVIGISNDLKFKEYLDPRVLSSLSEEEVVFPPYDANQLRDILTQRAEEAFYPGVLDEGVIPLCAALAAREHGDARKALDLLRVAGEIAEREGASKVTEKHVWKAQEKIEQDMMEEVIKTLPLQSKVLLYAIVLLDENGDLPANTGDVYAVYRELCEYIDLEPLTQRRISDLINELDMLGIINAKVVSKGRYGRTKEIRLMVTSYKIRNVLRYDYSIQPLLTISLKSEQRRLI
789 P851414/1-60 -------------------------------------------------------------MKIVWCGH-ACFLVEDRGTK-ILIDPYPDVDEDRIGKVDYILQTHEHMD-HYGKTPLIAKLSD----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
790 P841414/1-60 -------------------------------------------------------------MKIVWCGH-ACFLVEDRGTK-ILIDPYPDVDEDRIGKVDYILVTHEHMD-HYGKTPLIAKLSD----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
791 BAB68554/1-141 --------------------MLTEDDKQLIQHVWEKVLEHQEDFGAEALERMFIVYPSTKTYFPHFDLHHDSEQIRHHGKK-VVGALGDAVKHIDNLSATLSELSNLHCY-NLRVDPVNFKLLSHCFQVVLGAHLG--REYTPQVQVAYDKFLAAVSAVLAEKYR-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
792 gb|443893|124775/1-331 -MRFRFGVVVP?????????LLVVGSRPELG-RWEPRGAVRLRPAGTAAGDGALALQEPGLWLGEVELA-AEEAAQDGAEPGRVDTFWYKFLKREPGGELSWEGNGPHHDRCCTYNENNLVDGVYCLPIG---HWGEATGHTNEMKHTTDFYFNIAGHQAMHYSRILPNIWLGSCPRQVEHVTIKLKHELGITAVMN-FQTEWDIVQNSSGCNRYPEPMTPDTMIKLYREEGLAYIWMP-TPDMSTEGRVQMLPQAVCLLHALLEKGHIVY-----VHCNAGVGRSTAAVCGWLQYVMGWNLRKVQYFLMAKRPAVYIDEEALARAQEDFFQKFGKVRSSVCSL------------------------------------------------------------------------------
795 ###### test with phylip
797 my $phylip_str = <<EOF;
799 seq1 AAAATGGGGG TGGT------ GGTACCT--- -------
800 seq2 -----GGCGG TGGTGNNNNG GGTTCCCTNN NNNNNNN
801 new AAAATGGNGG TGGTN----N GGTNCCNTNN NNNNNNN
805 my $phylip_masked = <<EOF;
807 seq1 AAAATGGGGG TGGT------ GGTACCT--- -------
808 seq2 -----GGCGG TGGT?????? GGTTCCCTNN NNNNNNN
809 new AAAATGGNGG TGGT?----? GGTNCCNTNN NNNNNNN
813 my $phy_fh = IO::String->new( $phylip_str );
815 my $in = Bio::AlignIO->new( -fh => $phy_fh, -format => 'phylip' );
818 $aln = $in->next_aln();
819 eq_or_diff( aln2str( $aln, 'phylip' ), $phylip_str );
821 $newaln = $aln->mask_columns(15,20,'?');
822 eq_or_diff( aln2str( $newaln,'phylip' ), $phylip_masked, 'align after looks ok' );
828 my ( $aln, $fmt ) = @_;
830 my $out_fh = IO::String->new( $out );
831 my $alignio_out = Bio::AlignIO->new(-fh => $out_fh, -format => $fmt);
832 $alignio_out->write_aln( $aln );