7 test_begin(-tests => 115,
8 -requires_module => 'DB_File');
10 use_ok('Bio::Matrix::PSM::IO');
11 use_ok('Bio::DB::TFBS');
12 use_ok('Bio::DB::Taxonomy');
15 #*** need to test getting all ids of a certain kind, like $db->get_matrix_ids();
16 # but hard to do without a complete tax dump
18 my $temp_dir = test_output_dir();
19 my $tax_db = Bio::DB::Taxonomy->new(-source => 'flatfile',
20 -directory => $temp_dir,
21 -nodesfile => test_input_file('taxdump', 'nodes.dmp'),
22 -namesfile => test_input_file('taxdump', 'names.dmp'));
24 # test transfac pro (local flat files)
26 ok my $db = Bio::DB::TFBS->new(-source => 'transfac_pro',
27 -index_dir => $temp_dir,
28 -dat_dir => test_input_file('transfac_pro'),
34 ok my ($ref_id) = $db->get_reference_ids(-pubmed => 16574738);
35 is $ref_id, 'RE0047775';
36 ok my $ref = $db->get_reference($ref_id);
37 isa_ok $ref, 'Bio::Annotation::Reference';
38 is $ref->primary_id, 16574738;
39 is $ref->pubmed, $ref->primary_id;
40 is $ref->database, 'PUBMED';
41 is $ref->authors, '..Bet S . ,.u i rMeK ,,d. vWeWk KaS.ee.nyNk mJMMih. a, i P';
42 is $ref->location, 'Mc (o0o.. 0n)lnir.do 2E:6l';
43 is $ref->title, 'INDD VDGT C1AALEBEI.EIT IYIHLA6ITTE E ANV ITSL MTRTANYE TM NISP TNBAUTPOIORSL I- NVTOD,MHIRRLINSDX TRPY NO CAELUAOA SNMMNT CED5CTH NII TERTOI2IMTVPEH3DSAI';
45 my @sites = $db->get_site_ids(-reference => $ref_id);
46 is join(' ', sort @sites), 'R19310 R19311 R19312 R19313 R19314 R19315 R19316';
47 my @genes = $db->get_gene_ids(-reference => $ref_id);
48 is "@genes", 'G036757';
49 my @ref_ids = $db->get_reference_ids(-site => 'R19310');
50 is "@ref_ids", $ref_id;
51 @ref_ids = $db->get_reference_ids(-gene => 'G036757');
52 is "@ref_ids", $ref_id;
54 $ref_id = 'RE0047531';
55 my @matrices = $db->get_matrix_ids(-reference => $ref_id);
56 is join(' ', sort @matrices), 'M01123 M01124 M01125';
57 my @factors = $db->get_factor_ids(-reference => $ref_id);
58 like "@factors", qr/T08800/;
59 @ref_ids = $db->get_reference_ids(-matrix => 'M01123');
60 is join(' ', sort @ref_ids), "$ref_id RE0047626";
61 @ref_ids = $db->get_reference_ids(-factor => 'T08800');
62 is join(' ', sort @ref_ids), "$ref_id RE0047634 RE0047637 RE0047645";
64 $ref_id = 'RE0023998';
65 my %fragments = map { $_ => 1 } $db->get_fragment_ids(-reference => $ref_id);
66 ok $fragments{'FR0002267'};
67 @ref_ids = $db->get_reference_ids(-fragment => 'FR0002267');
68 is "@ref_ids", $ref_id;
73 ok my ($gene_id) = $db->get_gene_ids(-name => 'P5');
74 is $gene_id, 'G000001';
76 #*** get_genemap with ensembl lookup being fantastically slow
77 #ok defined Bio::Map::Gene->set_from_db; # will try and do ensembl lookups for gene info
78 #ok my $gene_map = $db->get_genemap($gene_id, 1000);
79 #Bio::Tools::Run::Ensembl->_stats;
80 #ok $gene_map->isa('Bio::Map::GeneMap');
81 #ok $gene_map->unique_id, 'G000001';
82 #ok $gene_map->universal_name, 'P5';
83 #ok $gene_map->species->scientific_name, 'Adeno-associated virus';
84 #my @factors = grep { $_->isa("Bio::Map::TranscriptionFactor") } $gene_map->get_elements;
87 ($gene_id) = $db->get_gene_ids(-id => 'AAV$P5');
88 is $gene_id, 'G000001';
89 my @gene_ids = $db->get_gene_ids(-species => '9606');
91 is [sort @gene_ids]->[0], 'G000060'; # in real data this would be G000174, but since our taxdump doesn't have chicken in it, G000060 was changed to human
92 ($gene_id) = $db->get_gene_ids(-site => 'R03174');
93 is $gene_id, 'G000001';
94 ($gene_id) = $db->get_gene_ids(-factor => 'T00267');
95 is $gene_id, 'G000060';
96 my %gene_ids = map { $_ => 1 } $db->get_gene_ids(-fragment => 'FR0002267');
97 ok $gene_ids{'G020751'};
98 # get_gene_ids(-reference => ...) already tested
100 my @site_ids = $db->get_site_ids(-gene => 'G000001');
101 is join(' ', sort @site_ids), 'R03174 R03175 R03176';
102 my @factor_ids = $db->get_factor_ids(-gene => 'G000060');
103 is join(' ', sort @factor_ids), 'T00267 T08293'; # only found for genes that encode factors
104 my %fragment_ids = map { $_ => 1 } $db->get_fragment_ids(-gene => 'G020751');
105 ok $fragment_ids{'FR0002267'};
106 # get_reference_ids(-gene => ...) already tested
111 ok my ($site_id) = $db->get_site_ids(-id => 'HS$IFI616_01');
112 is $site_id, 'R00001';
113 ok my $seq = $db->get_seq($site_id);
114 isa_ok $seq, 'Bio::Seq';
115 is $seq->id, 'HS$IFI616_01';
116 is $seq->accession_number, 'R00001';
117 is $seq->seq, 'aGAGACATAAGTgA';
118 my $annot = $seq->annotation;
119 is [$annot->get_Annotations('relative_start')]->[0]->value, -172;
120 is [$annot->get_Annotations('relative_end')]->[0]->value, -98;
121 is [$annot->get_Annotations('relative_type')]->[0]->value, 'TSS';
122 is [$annot->get_Annotations('relative_to')]->[0]->value, 'G000176';
123 is $seq->species, 9606;
125 my @site_ids = $db->get_site_ids(-species => '9606');
127 is [sort @site_ids]->[0], 'R00001';
128 # get_site_ids(-gene => ...) already tested
129 ($site_id) = $db->get_site_ids(-matrix => 'M00972');
130 is $site_id, 'R00001';
131 my %site_ids = map { $_ => 1 } $db->get_site_ids(-factor => 'T00428');
132 ok $site_ids{R00001};
133 # get_site_ids(-reference => ...) already tested
135 # get_gene_ids(-site => ...) already tested
136 my @matrix_ids = $db->get_matrix_ids(-site => 'R00001');
137 is "@matrix_ids", 'M00972';
138 my @factor_ids = $db->get_factor_ids(-site => 'R00001');
139 is "@factor_ids", 'T00428';
140 # get_reference_ids(-site => ...) already tested
145 ok my ($matrix_id) = $db->get_matrix_ids(-id => 'V$E47_01');
146 is $matrix_id, 'M00002';
147 ok my $matrix = $db->get_matrix($matrix_id);
148 isa_ok $matrix, 'Bio::Matrix::PSM::SiteMatrix';
152 # Lets try to compress and uncompress the frequencies, see if
153 # there is no considerable loss of data.
154 my $fA = $matrix->get_compressed_freq('A');
155 my @check = Bio::Matrix::PSM::SiteMatrix::_uncompress_string($fA,1,1);
156 my @A = $matrix->get_array('A');
157 my ($var, $max) = (0, 0);
158 for (my $i = 0; $i < @check; $i++) {
159 my $diff = abs(abs($check[$i]) - abs($A[$i]));
161 $max = $diff if ($diff > $max);
163 my $avg = $var / @check;
164 cmp_ok $avg, '<', 0.01; # Loss of data under 1 percent
166 # SiteMatrixI methods
167 is $matrix->id, 'V$E47_01';
168 is $matrix->accession_number, $matrix_id;
169 is $matrix->consensus, 'ATGCATGCATGC';
170 is $matrix->IUPAC, 'NNNNNNNNNNNN';
171 is $matrix->regexp, '\S\S\S\S\S\S\S\S\S\S\S\S';
172 is $matrix->width, 12;
173 is $matrix->sites, 5;
178 ok my $aln = $db->get_aln($matrix_id);
179 isa_ok $aln, 'Bio::SimpleAlign';
181 is $aln->num_residues, 132;
183 is $aln->num_sequences, 11;
184 my @ids = qw(R05108 R05109 R05110 R05111 R05112 R05113 R05114 R05115 R05116 R05117 R05118);
185 foreach my $seq ($aln->each_alphabetically) {
186 is $seq->id, shift(@ids);
189 ok ! $db->get_aln('M00001'); # no seqs in db
190 ok $aln = $db->get_aln('M00001', 1); # force to find seqs, store in db
191 ok $aln = $db->get_aln('M00001'); # seqs now in db
192 is $aln->num_sequences, 5;
194 ($matrix_id) = $db->get_matrix_ids(-name => 'MyoD');
195 is $matrix_id, 'M00001';
196 # get_matrix_ids(-site => ...) already tested
197 my %matrix_ids = map { $_ => 1 } $db->get_matrix_ids(-factor => 'T00526');
198 ok $matrix_ids{M00001};
199 # get_matrix_ids(-reference => ...) already tested
201 # get_site_ids(-matrix => ...) already tested
202 my @factor_ids = $db->get_factor_ids(-matrix => 'M00001');
203 is join(' ', sort @factor_ids), 'T00526 T09177';
204 # get_reference_ids(-matrix => ...) already tested
209 ok my ($fragment_id) = $db->get_fragment_ids(-id => 'FR0002267');
210 is $fragment_id, 'FR0002267'; # id and accession are the same for fragments
211 ok my $seq = $db->get_fragment($fragment_id);
212 isa_ok $seq, 'Bio::SeqI';
213 is $seq->id, 'FR0002267';
214 is $seq->seq, 'GTCTACAACACTCTTGCGGACGGAGAGCCGAAGAGCAAAGCGTCGCCGGGTAAGACGAACGCTCAAGGGGGTACGAGCAGCGTAACGACGGAAACGGTGACGCCCCGGGATTTGGGGCTCAGCTAGGGTCGCCGAGTAGGGGGCCGCGGGGACAACGGGGGCGACACGCCGCTTTCCCTGCGTCTGTGGAGCCTATGGTACGGCGTAACCGGTTGTGTGATGAACTGTCCAGACCGCACGTAGTCCCAGCGCAAGGTCTATGCCGCCTAGAGGCAAGACGGGCCGTCTCCTACTTAGTAGCCAGCTACGGGGCGTTGGTCCCCTCGGTAGTGCAACTATCCAGCCACGGCGTCCGCCGGGCTGAGCCTCAGCAGAGCTGGGGGGGTATCATTCCGACGCTGTTTAATTCGTCAGCAGGACCCACTACACGCTCTGTCATTCGCCTGAGCAGTTGTAAATTAGCGCGGCGATCTTGCAAGAGACAAGGAGGCGAACCTGGGGTCGGGACGTAAGGACGAACGGCAGTACAGACGCTGGGGGACGCCACGTGCCAGAACCTCTCACGACCGGAGGTTCAACGCTGATTGGGGCGCAACAGAGGGCGGAGCAGCGAGGTGGCGCTGGTGGGATGGGGCGAGACAAACCCAAGCTGACGCCGAAGGGCCCGCGTGGCCGGGCTGGGGCCCGTAGAACGAGGGAATTGTATGCGGCGCCTGAATGGGCGCACCACA';
215 is $seq->species, 9606;
217 # -id -species -gene -factor -reference
218 my @fragment_ids = $db->get_fragment_ids(-species => '9606');
220 is [sort @fragment_ids]->[0], 'FR0000001';
221 my %fragment_ids = map { $_ => 1 } $db->get_fragment_ids(-factor => 'T03828');
222 ok $fragment_ids{'FR0002267'};
223 # get_fragment_ids(-gene => ...) already tested
224 # get_fragment_ids(-reference => ...) already tested
226 my ($factor_id) = $db->get_factor_ids(-fragment => 'FR0002267');
227 is $factor_id, 'T03828';
228 # get_gene_ids(-fragment => ...) already tested
229 # get_reference_ids(-fragment => ...) already tested
234 ok my ($factor_id) = $db->get_factor_ids(-id => 'T00001');
235 is $factor_id, 'T00001'; # id and accession are the same for factors
236 ok my $factor = $db->get_factor($factor_id);
237 isa_ok $factor, 'Bio::Map::TranscriptionFactor';
238 is $factor->id, 'T00001';
239 is $factor->universal_name, 'AAF';
240 is $factor->known_maps, 1;
241 my @positions = $factor->get_positions;
244 ($factor_id) = $db->get_factor_ids(-name => 'AAF');
245 is $factor_id, 'T00001';
246 my @factor_ids = $db->get_factor_ids(-species => '9606');
248 is [sort @factor_ids]->[0], 'T00001';
249 @factor_ids = $db->get_factor_ids(-interactors => 'T03200');
250 is [sort @factor_ids]->[0], 'T00002';
251 # get_factor_ids(-gene => ...) already tested
252 # get_factor_ids(-site => ...) already tested
253 # get_factor_ids(-matrix => ...) already tested
254 # get_factor_ids(-fragment => ...) already tested
255 # get_factor_ids(-reference => ...) already tested
257 # get_*_ids(-factor => ...) already tested
261 # how to get something like ok $psmIO->release, '10.2--2006-06-30'; ?
262 # or all factors, all sites, all matrices, all genes etc.?