5 test_begin( -tests => 107,
6 -requires_modules => [qw(Bio::DB::Fasta Bio::SeqIO)] );
12 my $DEBUG = test_debug();
15 # Test Bio::DB::Fasta, but also the underlying module, Bio::DB::IndexedBase
17 my $test_dir = setup_temp_dir('dbfa');
18 my $test_file = test_input_file('dbfa', 'mixed_alphabet.fasta');
20 test_input_file('dbfa', 'mixed_alphabet.fasta'),
21 test_input_file('dbfa', '6.fa')
26 # Test basic functionalities
27 ok my $db = Bio::DB::Fasta->new($test_dir, -reindex => 1), 'Index a directory';
28 is $db->glob, '*.{fa,FA,fasta,FASTA,fast,FAST,dna,DNA,fna,FNA,faa,FAA,fsa,FSA}';
29 isa_ok $db, 'Bio::DB::Fasta';
30 is $db->length('CEESC13F'), 389;
31 is $db->seq('CEESC13F:1,10'), 'cttgcttgaa';
32 is $db->seq('CEESC13F:1-10'), 'cttgcttgaa';
33 is $db->seq('CEESC13F:1..10'), 'cttgcttgaa';
34 is $db->seq('CEESC13F:1..10/1'), 'cttgcttgaa';
35 is $db->seq('CEESC13F:1..10/+1'), 'cttgcttgaa';
36 is $db->seq('CEESC13F:1..10/-1'), 'ttcaagcaag';
37 is $db->seq('CEESC13F/1'), 'cttgcttgaaaaatttatataaatatttaagagaagaaaaataaataatcgcatctaatgacgtctgtccttgtatccctggtttccattgactggtgcactttcctgtctttgaggacatggacaatattcggcatcagttcctggctctccctcctctcctggtgctccagcagaaccgttctctccattatctcccttgtctccacgtggtccacgctctcctggtgctcctggaataccttgagctccctcgtgccgaattcctgcagcccgggggatccactagttctagagcggccgccaccgcggtgggagctccagcttttgttncctttagtgagggttaatttcgagcttggcgtaatcatggtcatagctgtttcctg';
38 is $db->seq('CEESC13F/-1'), 'caggaaacagctatgaccatgattacgccaagctcgaaattaaccctcactaaaggnaacaaaagctggagctcccaccgcggtggcggccgctctagaactagtggatcccccgggctgcaggaattcggcacgagggagctcaaggtattccaggagcaccaggagagcgtggaccacgtggagacaagggagataatggagagaacggttctgctggagcaccaggagaggagggagagccaggaactgatgccgaatattgtccatgtcctcaaagacaggaaagtgcaccagtcaatggaaaccagggatacaaggacagacgtcattagatgcgattatttatttttcttctcttaaatatttatataaatttttcaagcaag';
39 is $db->seq('AW057119', 1, 10), 'tcatgttggc';
40 is $db->seq('AW057119', 1, 10, 1), 'tcatgttggc';
41 is $db->seq('AW057119', 1, 10, -1), 'gccaacatga';
42 is $db->seq('AW057119', 10, 1), 'gccaacatga';
43 is $db->seq('AW057119', 10, 1, -1), 'tcatgttggc';
44 is $db->header('AW057119'), 'AW057119 test description';
45 is $db->seq('foobarbaz'), undef;
46 is $db->get_Seq_by_id('foobarbaz'), undef;
47 is $db->file('AW057119'), '1.fa';
48 is $db->file('AW057410'), '3.fa';
49 is $db->file('CEESC13F'), '6.fa';
51 # Bio::DB::RandomAccessI and Bio::DB::SeqI methods
52 ok my $primary_seq = $db->get_Seq_by_id('AW057119');
53 ok $primary_seq = $db->get_Seq_by_acc('AW057119');
54 ok $primary_seq = $db->get_Seq_by_version('AW057119');
55 ok $primary_seq = $db->get_Seq_by_primary_id('AW057119');
56 isa_ok $primary_seq, 'Bio::PrimarySeq::Fasta';
57 isa_ok $primary_seq, 'Bio::PrimarySeqI';
59 # Bio::PrimarySeqI methods
60 is $primary_seq->id, 'AW057119';
61 is $primary_seq->display_id, 'AW057119';
62 like $primary_seq->primary_id, qr/^Bio::PrimarySeq::Fasta=HASH/;
63 is $primary_seq->alphabet, 'dna';
64 is $primary_seq->accession_number, 'unknown';
65 is $primary_seq->is_circular, undef;
66 is $primary_seq->subseq(11, 20), 'ttctcggggt';
67 is $primary_seq->description, 'test description', 'bug 3126';
68 is $primary_seq->seq, 'tcatgttggcttctcggggtttttatggattaatacattttccaaacgattctttgcgccttctgtggtgccgccttctccgaaggaactgacgaaaaatgacgtggatttgctgacaaatccaggcgaggaatatttggacggattgatgaaatggcacggcgacgagcgacccgtgttcaaaagagaggacatttatcgttggtcggatagttttccagaatatcggctaagaatgatttgtctgaaagacacgacaagggtcattgcagtcggtcaatattgttactttgatgctctgaaagaaaggagagcagccattgttcttcttaggattgggatggacggatcctgaatatcgtaatcgggcagttatggagcttcaagcttcgatggcgctggaggagagggatcggtatccgactgccaacgcggcatcgcatccaaataagttcatgaaacgattttggcacatattcaacggcctcaaagagcacgaggacaaaggtcacaaggctgccgctgtttcatacaagagcttctacgacctcanagacatgatcattcctgaaaatctggatgtcagtggtattactgtaaatgatgcacgaaaggtgccacaaagagatataatcaactacgatcaaacatttcatccatatcatcgagaaatggttataatttctcacatgtatgacaatgatgggtttggaaaagtgcgtatgatgaggatggaaatgtacttggaattgtctagcgatgtctttanaccaacaagactgcacattagtcaattatgcagatagcc';
69 ok my $trunc = $primary_seq->trunc(11,20);
70 isa_ok $trunc, 'Bio::PrimarySeq::Fasta';
71 isa_ok $trunc, 'Bio::PrimarySeqI';
72 is $trunc->length, 10;
73 is $trunc->seq, 'ttctcggggt';
74 ok my $rev = $trunc->revcom;
75 isa_ok $rev, 'Bio::PrimarySeq::Fasta';
76 isa_ok $rev, 'Bio::PrimarySeqI';
77 is $rev->seq, 'accccgagaa';
83 # Re-open an existing index.
84 # Doing this test properly involves unloading and reloading Bio::DB::Fasta.
87 test_skip(-tests => 1, -requires_modules => [qw(Class::Unload)]);
88 use_ok('Class::Unload');
89 Class::Unload->unload( 'Bio::DB::Fasta' );
90 Class::Unload->unload( 'Bio::DB::IndexedBase' );
91 require Bio::DB::Fasta;
94 ok my $db = Bio::DB::Fasta->new($test_dir), 'Re-open an existing index';
95 is $db->seq('AW057119', 1, 10), 'tcatgttggc';
100 # Test tied hash access
102 ok tie(%h, 'Bio::DB::Fasta', $test_dir), 'Tied hash access';
103 ok exists $h{'AW057146'};
104 is $h{'AW057146:1,10'} , 'aatgtgtaca'; # in file 1.fa
105 is $h{'AW057146:10,1'} , 'tgtacacatt'; # reverse complement
106 is $h{'AW057443:11,20'}, 'gaaccgtcag'; # in file 4.fa
111 # Test writing the Bio::PrimarySeq::Fasta objects with SeqIO
112 ok my $db = Bio::DB::Fasta->new($test_dir, -reindex => 1), 'Writing with SeqIO';
113 my $out = Bio::SeqIO->new(
114 -format => 'genbank',
115 -file => '>'.test_output_file()
117 my $primary_seq = Bio::Seq->new(-primary_seq => $db->get_Seq_by_acc('AW057119'));
119 $out->write_seq($primary_seq)
123 $out = Bio::SeqIO->new(-format => 'embl', -file => '>'.test_output_file());
125 $out->write_seq($primary_seq)
132 # Test alphabet and reverse-complement RNA
133 ok my $db = Bio::DB::Fasta->new( $test_file, -reindex => 1), 'Index a single file';
134 is $db->alphabet('gi|352962132|ref|NG_030353.1|'), 'dna';
135 is $db->alphabet('gi|352962148|ref|NM_001251825.1|'), 'rna';
136 is $db->alphabet('gi|194473622|ref|NP_001123975.1|'), 'protein';
137 is $db->alphabet('gi|61679760|pdb|1Y4P|B'), 'protein';
138 is $db->alphabet('123'), '';
139 is $db->seq('gi|352962148|ref|NM_001251825.1|', 20, 29, 1), 'GUCAGCGUCC';
140 is $db->seq('gi|352962148|ref|NM_001251825.1|', 20, 29, -1), 'GGACGCUGAC';
142 # Test empty sequence
143 is $db->seq('123'), '';
145 is $db->file('gi|352962132|ref|NG_030353.1|'), 'mixed_alphabet.fasta';
151 ok my $db = Bio::DB::Fasta->new( $test_file, -reindex => 1);
152 ok my $stream = $db->get_PrimarySeq_stream;
153 isa_ok $stream, 'Bio::DB::Indexed::Stream';
155 while (my $seq = $stream->next_seq) {
160 # ActivePerl will not allow deletion if the tie-hash is still active
162 # Strawberry Perl temporary file
163 unlink "$test_file.index" if -e "$test_file.index";
164 # ActivePerl temporary files
165 unlink "$test_file.index.dir" if -e "$test_file.index.dir";
166 unlink "$test_file.index.pag" if -e "$test_file.index.pag";
171 # Concurrent databases (bug #3390)
172 ok my $db1 = Bio::DB::Fasta->new( test_input_file('dbfa', '1.fa') );
173 ok my $db3 = Bio::DB::Fasta->new( test_input_file('dbfa', '3.fa') );
174 ok my $db4 = Bio::DB::Fasta->new( $test_dir );
175 ok my $db2 = Bio::DB::Fasta->new( test_input_file('dbfa', '2.fa') );
176 is $db4->file('AW057231'), '1.fa';
177 is $db2->file('AW057302'), '2.fa';
178 is $db4->file('AW057119'), '1.fa';
179 is $db3->file('AW057336'), '3.fa';
180 is $db1->file('AW057231'), '1.fa';
181 is $db4->file('AW057410'), '3.fa';
183 # ActivePerl will not allow deletion if the tie-hash is still active
187 # Strawberry Perl temporary file
188 unlink $db1->index_name if -e $db1->index_name;
189 unlink $db2->index_name if -e $db2->index_name;
190 unlink $db3->index_name if -e $db3->index_name;
191 # ActivePerl temporary files
192 unlink $db1->index_name().'.dir' if -e $db1->index_name().'.dir';
193 unlink $db2->index_name().'.dir' if -e $db2->index_name().'.dir';
194 unlink $db3->index_name().'.dir' if -e $db3->index_name().'.dir';
195 unlink $db1->index_name().'.pag' if -e $db1->index_name().'.pag';
196 unlink $db2->index_name().'.pag' if -e $db2->index_name().'.pag';
197 unlink $db3->index_name().'.pag' if -e $db3->index_name().'.pag';
202 # Test an arbitrary index filename and cleaning
203 my $name = 'arbitrary.idx';
204 ok my $db = Bio::DB::Fasta->new( $test_file,
205 -reindex => 1, -index_name => $name, -clean => 1,
207 is $db->index_name, $name;
209 # Tied-hash in Strawberry Perl produce $name,
210 # while in ActivePerl produce "$name.dir" and "$name.pag"
211 if (-e "$name.pag") {
213 # ActivePerl will not allow deletion if the tie-hash is still active
215 unlink "$name.dir" if -e "$name.dir";
216 unlink "$name.pag" if -e "$name.pag";
221 # ActivePerl will not allow deletion if the tie-hash is still active
223 unlink $name if -e $name;
232 ok my $db = Bio::DB::Fasta->new( $test_file,
233 -reindex => 1, -clean => 1, -makeid => \&extract_gi,
235 is_deeply [sort $db->get_all_primary_ids], ['', 194473622, 352962132, 352962148, 61679760];
236 is $db->get_Seq_by_id('gi|352962148|ref|NM_001251825.1|'), undef;
237 isa_ok $db->get_Seq_by_id(194473622), 'Bio::PrimarySeqI';
242 # Test makeid that generates several IDs, bug #3389
243 ok my $db = Bio::DB::Fasta->new( $test_file,
244 -reindex => 1, -clean => 1, -makeid => \&extract_gi_and_ref,
245 ), 'Make multiple IDs, bug #3389';
246 is_deeply [sort $db->get_all_primary_ids], ['', 194473622, 352962132, 352962148, 61679760, 'NG_030353.1', 'NM_001251825.1', 'NP_001123975.1'];
247 is $db->get_Seq_by_id('gi|352962148|ref|NM_001251825.1|'), undef;
248 isa_ok $db->get_Seq_by_id('NG_030353.1'), 'Bio::PrimarySeqI';
253 # Test opening set of files and test IDs
254 ok my $db = Bio::DB::Fasta->new( $test_files, -reindex => 1), 'Index a set of files';
257 my @ids = sort $db->get_all_primary_ids();
258 is_deeply \@ids, [ qw(
297 gi|194473622|ref|NP_001123975.1|
298 gi|352962132|ref|NG_030353.1|
299 gi|352962148|ref|NM_001251825.1|
300 gi|61679760|pdb|1Y4P|B
302 like $db->index_name, qr/^fileset_.+\.index$/;
304 my $index = $db->index_name;
305 # ActivePerl will not allow deletion if the tie-hash is still active
307 # Strawberry Perl temporary file
308 unlink $index if -e $index;
309 # ActivePerl temporary files
310 unlink "$index.dir" if -e "$index.dir";
311 unlink "$index.pag" if -e "$index.pag";
316 # Squash warnings locally
317 local $SIG{__WARN__} = sub {};
320 my $test_dir = setup_temp_dir('bad_dbfa');
321 throws_ok {my $db = Bio::DB::Fasta->new($test_dir, -reindex => 1)}
322 qr/FASTA header doesn't match/;
325 # Empty lines within a sequence is bad...
326 throws_ok {my $db = Bio::DB::Fasta->new(test_input_file('badfasta.fa'), -reindex => 1)}
327 qr/Blank lines can only precede header lines/;
333 # but empty lines preceding headers are okay, but let's check the seqs just in case
335 lives_ok {$db = Bio::DB::Fasta->new(test_input_file('spaced_fasta.fa'), -reindex => 1)};
336 is length($db->seq('CEESC39F')), 375, 'length is correct in sequences past spaces';
337 is length($db->seq('CEESC13F')), 389;
339 is $db->subseq('CEESC39F', 51, 60) , 'acatatganc', 'subseq is correct';
340 is $db->subseq('CEESC13F', 146, 155), 'ggctctccct', 'subseq is correct';
342 # Remove temporary test file
343 my $outfile = test_input_file('spaced_fasta.fa').'.index';
345 # ActivePerl will not allow deletion if the tie-hash is still active
347 # Strawberry Perl temporary file
348 unlink $outfile if -e $outfile;
349 # ActivePerl temporary files
350 unlink "$outfile.dir" if -e "$outfile.dir";
351 unlink "$outfile.pag" if -e "$outfile.pag";
358 # Extract GI from RefSeq
360 my ($id) = ($header =~ /gi\|(\d+)/m);
365 sub extract_gi_and_ref {
366 # Extract GI and from RefSeq
368 my ($gi) = ($header =~ /gi\|(\d+)/m);
370 my ($ref) = ($header =~ /ref\|([^|]+)/m);
377 # this obfuscation is to deal with lockfiles by GDBM_File which can
378 # only be created on local filesystems apparently so will cause test
379 # to block and then fail when the testdir is on an NFS mounted system
381 my $data_dir = shift;
383 my $io = Bio::Root::IO->new();
384 my $tempdir = test_output_dir();
385 my $test_dir = $io->catfile($tempdir, $data_dir);
386 mkdir($test_dir); # make the directory
387 my $indir = test_input_file($data_dir);
388 opendir(my $INDIR,$indir) || die("cannot open dir $indir");
389 # effectively do a cp -r but only copy the files that are in there, no subdirs
390 for my $file ( map { $io->catfile($indir,$_) } readdir($INDIR) ) {
391 next unless (-f $file );
392 copy($file, $test_dir);