tag fourth (and hopefully last) alpha
[bioperl-live.git] / branch-1-6 / t / Assembly / Assembly.t
blob69bf87e587ee86fe123425135e2e01dca8dcf501
1 # -*-Perl-*- Test Harness script for Bioperl
2 # $Id$
4 use strict;
6 BEGIN {
7     use lib '.';
8     use Bio::Root::Test;
9     
10     test_begin(-tests => 51,
11         -requires_module => 'DB_File');
12         
13         use_ok('Bio::Assembly::IO');
17 # Testing IO
21 # Some PHRAP input
24 my $in = Bio::Assembly::IO->new
25         (-file => test_input_file('consed_project','edit_dir','test_project.phrap.out'));
27 isa_ok($in, 'Bio::Assembly::IO');
29 my $sc;
31 TODO: {
32         local $TODO = "phrap parser doesn't include the sequence string in the sequence objects.";
33         $in->verbose(2);
34         eval {$sc = $in->next_assembly};
35         ok(!$@);
38 $in = Bio::Assembly::IO->new
39         (-file => test_input_file('consed_project','edit_dir','test_project.phrap.out'));
41 $in->verbose(-1);
43 $sc = $in->next_assembly;
45 isa_ok($sc,
46            'Bio::Assembly::Scaffold');
49 # Testing Scaffold
52 is $sc->id, "NoName";
53 is $sc->id('test'), "test";
55 isa_ok($sc->annotation, 'Bio::AnnotationCollectionI');
56 is $sc->annotation->get_all_annotation_keys, 0,"no annotations in Annotation collection?";
57 is $sc->get_nof_contigs, 1;
58 is $sc->get_nof_sequences_in_contigs, 2;
59 is($sc->get_nof_singlets, 2, "get_nof_singlets");
60 is($sc->get_contig_seq_ids, 2, "get_contig_seq_ids");
61 is($sc->get_contig_ids, 1, "get_contig_ids");
62 is($sc->get_singlet_ids, 2, "get_singlet_ids");
65 # Testing Contig
69 # Testing ContigAnalysis
73 # Testing Ace 
76 my $aio = Bio::Assembly::IO->new(
77         -file=>test_input_file('consed_project','edit_dir','test_project.fasta.screen.ace.2'),
78         -format=>'ace',
81 my $assembly = $aio->next_assembly();
83 my @contigs = $assembly->all_contigs();
85 my $direction = $contigs[0]->strand;
86 is $direction, 1;
88 my $features =  $contigs[0]->get_features_collection;
89 my @contig_features = $features->get_all_features;
90 is @contig_features, 8;
92 my @annotations = grep {$_->primary_tag eq 'Annotation'} @contig_features;
93 is @annotations, 2;
94 my $had_tag = 0;
95 foreach my $an (@annotations) {
96         if ($an->has_tag('extra_info')) {
97                 $had_tag++;
98                 is (($an->get_tag_values('extra_info'))[0], "contig extra\ninfo\n");
99         }
100         elsif ($an->has_tag('comment')){
101                 $had_tag++;
102                 is (($an->get_tag_values('comment'))[0], "contig tag\ncomment\n");
103         }
105 is $had_tag, 2;
107 is $assembly->get_nof_contigs, 1;
108 is $assembly->get_nof_sequences_in_contigs, 2;
109 is($assembly->get_nof_singlets, 0, "get_nof_singlets");
110 is($assembly->get_contig_seq_ids, 2, "get_contig_seq_ids");
111 is($assembly->get_contig_ids, 1, "get_contig_ids");
112 is($assembly->get_singlet_ids, 0, "get_singlet_ids");
115 $aio = Bio::Assembly::IO->new(
116         -file=>test_input_file('assembly_with_singlets.ace'),
117         -format=>'ace',
119 $assembly = $aio->next_assembly();
120 is $assembly->get_nof_contigs, 3;
121 my @ace_contigs = $assembly->all_contigs();
122 isa_ok $ace_contigs[0], "Bio::Assembly::Contig",'the contig is a Bio::Assembly::Contig';
123 is $assembly->get_nof_sequences_in_contigs, 6;
124 is($assembly->get_nof_singlets, 33, "get_nof_singlets");
125 my @ace_singlets = $assembly->all_singlets();
126 isa_ok $ace_singlets[0], "Bio::Assembly::Contig",'the singlet is a Bio::Assembly::Contig';
127 isa_ok $ace_singlets[0], "Bio::Assembly::Singlet",'the singlet is a Bio::Assembly::Singlet';
128 is($assembly->get_contig_seq_ids, 6, "get_contig_seq_ids");
129 is($assembly->get_contig_ids, 3, "get_contig_ids");
130 is($assembly->get_singlet_ids, 33, "get_singlet_ids");
136 # Testing TIGR format
139 # Importing an assembly
141 my $asm_in = Bio::Assembly::IO->new(
142     -file => test_input_file("sample_dataset.tasm "),
143     -format=>'tigr'
145 my $scaf_in = $asm_in->next_assembly;
147 isa_ok($scaf_in, 'Bio::Assembly::Scaffold');
148 is($scaf_in->id, 'NoName');
149 is($scaf_in->get_nof_contigs, 13);
150 is($scaf_in->get_nof_sequences_in_contigs, 36);
151 is($scaf_in->get_nof_singlets, 0);
152 my @seqids = sort qw(sdsu|SDSU1_RFPERU_001_A09.x01.phd.1
153 sdsu|SDSU1_RFPERU_001_B03.x01.phd.1 sdsu|SDSU1_RFPERU_001_B04.x01.phd.1
154 sdsu|SDSU1_RFPERU_001_E04.x01.phd.1 sdsu|SDSU_RFPERU_002_A01.x01.phd.1
155 sdsu|SDSU_RFPERU_002_B07.x01.phd.1 sdsu|SDSU_RFPERU_002_C12.x01.phd.1
156 sdsu|SDSU_RFPERU_002_D08.x01.phd.1 sdsu|SDSU_RFPERU_002_H12.x01.phd.1
157 sdsu|SDSU_RFPERU_003_G09.x01.phd.1 sdsu|SDSU_RFPERU_004_H12.x01.phd.1
158 sdsu|SDSU_RFPERU_005_F02.x01.phd.1 sdsu|SDSU_RFPERU_006_D03.x01.phd.1
159 sdsu|SDSU_RFPERU_006_E04.x01.phd.1 sdsu|SDSU_RFPERU_006_E05.x01.phd.1
160 sdsu|SDSU_RFPERU_006_H08.x01.phd.1 sdsu|SDSU_RFPERU_007_E09.x01.phd.1
161 sdsu|SDSU_RFPERU_007_F06.x01.phd.1 sdsu|SDSU_RFPERU_008_B02.x01.phd.1
162 sdsu|SDSU_RFPERU_009_E07.x01.phd.1 sdsu|SDSU_RFPERU_010_B05.x01.phd.1
163 sdsu|SDSU_RFPERU_010_B06.x01.phd.1 sdsu|SDSU_RFPERU_010_C09.x01.phd.1
164 sdsu|SDSU_RFPERU_010_D10.x01.phd.1 sdsu|SDSU_RFPERU_012_H02.x01.phd.1
165 sdsu|SDSU_RFPERU_013_B05.x01.phd.1 sdsu|SDSU_RFPERU_013_C07.x01.phd.1
166 sdsu|SDSU_RFPERU_013_C08.x01.phd.1 sdsu|SDSU_RFPERU_013_G10.x01.phd.1
167 sdsu|SDSU_RFPERU_013_H05.x01.phd.1 sdsu|SDSU_RFPERU_014_H06.x01.phd.1
168 sdsu|SDSU_RFPERU_015_A05.x01.phd.1 sdsu|SDSU_RFPERU_015_C06.x01.phd.1
169 sdsu|SDSU_RFPERU_015_E04.x01.phd.1 sdsu|SDSU_RFPERU_015_G04.x01.phd.1
170 sdsu|SDSU_RFPERU_015_H03.x01.phd.1);
171 my @contigids = sort qw(106 144 148 17 185 2 210 36 453 500 613 668 93);
172 is_deeply([sort $scaf_in->get_contig_seq_ids], \@seqids);
173 is_deeply([sort $scaf_in->get_contig_ids], \@contigids);
174 is_deeply([$scaf_in->get_singlet_ids], []);
175 isa_ok($scaf_in->get_seq_by_id('sdsu|SDSU1_RFPERU_001_A09.x01.phd.1'),'Bio::LocatableSeq');
176 my $contig = $scaf_in->get_contig_by_id('106');
177 isa_ok($contig,'Bio::Assembly::Contig');
178 # check Contig object SeqFeature::Collection
179 # should add more specific Contig tests...
180 my @sfs = $contig->get_features_collection->get_all_features;
181 is(scalar(@sfs), 5);
182 my %primary_tags = map { $_->primary_tag => 1 } @sfs;
183 ok exists $primary_tags{'_aligned_coord:sdsu|SDSU_RFPERU_006_E04.x01.phd.1'};
184 is($sfs[1]->seq_id(), undef); # should this be undef?
186 isa_ok($scaf_in->annotation, 'Bio::AnnotationCollectionI');
187 is($scaf_in->annotation->get_all_annotation_keys, 0,"no annotations in Annotation collection?");
189 # Exporting an assembly
191 my $asm_outfile = test_output_file();
192 my $asm_out = Bio::Assembly::IO->new(
193     -file=> ">$asm_outfile",
194     -format=>'tigr'
197 ok $asm_out->write_assembly( -scaffold => $scaf_in);