[BUG] bug 2598
[bioperl-live.git] / t / Index.t
blob47b9bacbbf71f9218a9443ce227c2896ad2c85c6
1 # -*-Perl-*- Test Harness script for Bioperl
2 # $Id$
4 use strict;
6 BEGIN {
7    use lib 't/lib';
8    use BioperlTest;
9    
10    test_begin(-tests => 64,
11               -requires_modules => [qw(DB_File
12                                        Storable
13                                        Fcntl)]);
14    
15    use_ok('Bio::Index::Fasta');
16    use_ok('Bio::Index::Qual');
17    use_ok('Bio::Index::SwissPfam');
18    use_ok('Bio::Index::EMBL');
19    use_ok('Bio::Index::GenBank');
20    use_ok('Bio::Index::Swissprot');
21    use_ok('Bio::DB::InMemoryCache');
24 my $ind = Bio::Index::Fasta->new(-filename => 'Wibbl',
25                                                                                         -write_flag => 1,
26                                                                                         -verbose => 0);
27 $ind->make_index(test_input_file('multifa.seq'));
28 $ind->make_index(test_input_file('seqs.fas'));
30 ok ( -e "Wibbl" || -e "Wibbl.pag" );
31 my $seq = $ind->fetch('HSEARLOBE');
32 is($seq->length,321);
33 is($seq->primary_id(),'HSEARLOBE');
34 $seq = $ind->fetch('HSMETOO');
35 is($seq->length,134);
36 is($seq->primary_id(),'HSMETOO');
37 $seq = $ind->fetch('MMWHISK');
38 is($seq->length,62);
39 is($seq->primary_id(),'MMWHISK');
40 $seq = $ind->fetch('gi|238775|bbs|65126');
41 is($seq->length,70);
43 my $stream = $ind->get_PrimarySeq_stream();
44 $seq = $stream->next_seq;
45 isa_ok $seq, 'Bio::PrimarySeqI';
47 $ind = Bio::Index::Fasta->new(-filename => 'multifa_index',
48                                                                                 -write_flag => 1,
49                                                                                 -verbose => 0);
50 $ind->make_index(test_input_file('multifa.seq.qual'));
52 ok ( -e "multifa_index" );
54 $ind = Bio::Index::Qual->new(-filename => 'multifa_qual_index',
55                                                                           -write_flag => 1,
56                                                                           -verbose => 0);
57 $ind->make_index(test_input_file('multifa.seq.qual'));
59 ok ( -e "multifa_qual_index" );
61 ok ( defined($seq) );
62 isa_ok $seq, 'Bio::SeqI';
63 $seq = $ind->fetch('HSEARLOBE');
64 is($seq->length,321);
65 is($seq->primary_id(),'HSEARLOBE');
66 $seq = $ind->fetch('HSMETOO');
67 is($seq->length,134);
68 is($seq->primary_id(),'HSMETOO');
69 $seq = $ind->fetch('MMWHISK');
70 is($seq->length,62);
71 is($seq->primary_id(),'MMWHISK');
72 $seq = $ind->fetch('NONEXISTENT_SEQ');
73 ok(! defined $seq);
75 $ind = Bio::Index::SwissPfam->new(-filename => 'Wibbl2',
76                                                                                          -write_flag =>1);
77 $ind->make_index(test_input_file('swisspfam.data'));
79 ok ( -e "Wibbl2" || -e "Wibbl2.pag" );
81 $ind = Bio::Index::EMBL->new(-filename   => 'Wibbl3',
82                              -write_flag =>1);
83 $ind->make_index(test_input_file('test.embl'));
84 ok ( -e "Wibbl3" || -e "Wibbl3.pag" );
85 is ($ind->fetch('AL031232')->length, 4870);
87 $ind = Bio::Index::Swissprot->new(-filename   => 'Wibbl4',
88                                                                                          -write_flag => 1);
89 $ind->make_index(test_input_file('roa1.swiss'));
90 ok ( -e "Wibbl4" || -e "Wibbl4.pag" );
91 $seq = $ind->fetch('ROA1_HUMAN');
92 is ($seq->display_id(), 'ROA1_HUMAN');
93 $seq = $ind->fetch('P09651');
94 is ($seq->display_id(), 'ROA1_HUMAN');
96 # test id_parser
97 $ind = Bio::Index::Swissprot->new(-filename   => 'Wibbl4',
98                                                                                          -write_flag => 1);
99 $ind->id_parser(\&get_id);
100 $ind->make_index(test_input_file('roa1.swiss'));
101 ok ( -e "Wibbl4" || -e "Wibbl4.pag" );
102 $seq = $ind->fetch('X12671');
103 is ($seq->length,371);
106 my $gb_ind = Bio::Index::GenBank->new(-filename => 'Wibbl5',
107                                                                                                   -write_flag =>1,
108                                                                                                   -verbose    => 0);
109 $gb_ind->make_index(test_input_file('roa1.genbank'));
110 ok ( -e "Wibbl5" || -e "Wibbl5.pag" );
111 $seq = $gb_ind->fetch('AI129902');
112 is ($seq->length, 37);
113 is ($seq->species->binomial, 'Homo sapiens');
114 $seq = $gb_ind->fetch(3598416);
115 is ($seq->seq,"CTCCGCGCCAACTCCCCCCACCCCCCCCCCACACCCC");
117 my $cache = Bio::DB::InMemoryCache->new( -seqdb => $gb_ind );
119 ok ( $cache->get_Seq_by_id('AI129902') );
121 SKIP: {
122    test_skip(-tests => 22, -requires_module => 'Bio::DB::FileCache');
124    $cache = Bio::DB::FileCache->new(-seqdb => $gb_ind,
125                                                                                                 -keep  => 1,
126                                                                                                 -file  => 'filecache.idx');
127    # problem:
128    my $seq = $cache->get_Seq_by_id('AI129902');
129    ok ( $seq);
130    is ( $seq->length, 37);
131    is ( lc($seq->seq()), 'ctccgcgccaactccccccaccccccccccacacccc');
133    my ( $f1 ) = $seq->get_SeqFeatures();
134    is ( ($f1->each_tag_value('sex'))[0], 'female');
135    is ( ($f1->each_tag_value('lab_host'))[0], 'DH10B');
136    my $species = $seq->species;
137    ok( $species );
138    is( $species->binomial, 'Homo sapiens');
139    is( $species->species(), 'sapiens');
140    is( $species->genus(), 'Homo');
141    # changes in GenBank file SOURCE line
142    # this is now the abbreviated name
143    ok defined($species->name('abbreviated'));
144    is ($species->name('abbreviated')->[0], 'human');
146    $cache = undef;
147    $cache = Bio::DB::FileCache->new(-seqdb => $gb_ind,
148                                                                                                 -keep  => 0,
149                                                                                                 -file  => 'filecache.idx');
150    $seq = $cache->get_Seq_by_id('AI129902');
151    ok ( $seq);
152    is ( $seq->length, 37);
153    is ( lc($seq->seq()), 'ctccgcgccaactccccccaccccccccccacacccc');
155    ( $f1 ) = $seq->get_SeqFeatures();
156    is ( ($f1->each_tag_value('sex'))[0], 'female');
157    is ( ($f1->each_tag_value('lab_host'))[0], 'DH10B');
158    $species = $seq->species;
159    ok( $species );
160    is( $species->binomial, 'Homo sapiens');
161    is( $species->species(), 'sapiens');
162    is( $species->genus(), 'Homo');
163    # changes in GenBank file SOURCE line
164    # this is now the abbreviated name
165    ok defined($species->name('abbreviated'));
166    is ($species->name('abbreviated')->[0], 'human');
169 # test id_parser
170 $gb_ind = Bio::Index::GenBank->new(-filename => 'Wibbl5',
171                                                                                           -write_flag =>1,
172                                                                                           -verbose    => 0);
173 $gb_ind->id_parser(\&get_id);
174 $gb_ind->make_index(test_input_file('roa1.genbank'));
175 ok ( -e "Wibbl5" || -e "Wibbl5.pag" );
176 $seq = $gb_ind->fetch('alpha D-globin');
177 is ($seq->length,141);
179 sub get_id {
180         my $line = shift;
181         return $1 if ($line =~ /product="([^"]+)"/);
182         return $1 if ($line =~ /^DR\s+EMBL;\s+([^;]+)/);
185 END {
186         cleanup();
189 sub cleanup {
190         for my $root ( qw( Wibbl Wibbl2 Wibbl3 Wibbl4 Wibbl5
191                       multifa_index multifa_qual_index ) ) {
192                 unlink $root if( -e $root );
193                 unlink "$root.pag" if( -e "$root.pag");
194                 unlink "$root.dir" if( -e "$root.dir");
195         }