1 # -*-Perl-*- Test Harness script for Bioperl
10 test_begin(-tests => 1422,
11 -requires_module => 'Bio::ASN1::EntrezGene');
16 my @species=('Homo sapiens','Mus musculus', 'Caenorhabditis elegans');
17 my @pubmed=qw(15461460 15221005 14702039 12477932 8889549 3610142 3458201 2591067);
59 my %asym=(1=>['A1B', 'ABG', 'GAB', 'HYST2477', 'DKFZp686F0970'],
60 2=>['FWP007','S863-7','DKFZp779B086'], 4=>['A12M1'], 5=>['A12M2'],6=>['A12M3'],7=>['A12M4'],
61 9=>['AAC1'],10=>['AAC2'],11=>['NATP'],
62 12=>['ACT','AACT','MGC88254'],13=>['DAC'],15=>['SNAT','AA-NAT'],
64 11287=>['A1m','A2m','MAM'],
65 11298=>['Nat4','SNAT','Nat-2'],
66 11302=>['AATYK','mKIAA0641'],11303=>['Abc1'],
67 11304=>['RmP','Abcr','Abc10','D430003I15Rik'],
68 11305=>['Abc2','mKIAA1062','D2H0S1474E'],
70 171590=>['Y74C9A.3','CELK05052'],
71 171591=>['Y74C9A.2','CELK01753'],
72 171592=>['Y74C9A.4a','Y74C9A.4b','CELK08126'],
73 171593=>['Y74C9A.5','CELK09643'],
74 171594=>['Y48G1C.4','CELK05819']);
116 my @loop_counts = ([1,1,1,1,5,1,12,1,1,1,14,8,16],
117 [1,1,1,1,3,1,40,1,1,1,14,31],
118 [1,1,1,1,0,1,4,1,10,5],
123 [1,0,1,1,0,1,4,1,1,1,13,0],
124 [1,1,1,1,1,1,33,1,1,1,14,41],
125 [1,1,1,1,1,1,51,1,1,1,14,51],
126 [1,0,1,1,1,1,1,1,10,1],
127 [1,1,1,1,3,1,28,1,1,1,14,33],
128 [1,1,1,1,1,1,17,1,1,1,14,10],
129 [1,1,1,1,0,1,11,1,1,1,13,20],
130 [1,1,1,1,2,1,16,1,1,1,14,23],
133 [1,1,1,1,3,1,10,1,13,10],
141 [1,1,1,1,3,1,9,1,13,5,16],
145 [1,1,1,1,2,1,10,1,12,19],
146 [1,1,1,1,1,1,50,1,13,14],
147 [1,1,1,1,4,1,9,1,13,12],
148 [1,1,1,1,3,1,9,1,13,8,8],
149 [1,1,1,1,1,1,11,1,13,12],
150 [1,1,0,1,2,0,0,1,1,1,1,9,4],
151 [1,1,0,1,2,0,0,1,1,1,1,9,4],
152 [1,1,0,1,3,0,0,1,1,1,1,9,8],
153 [1,1,0,1,2,0,0,1,1,1,1,9,4],
154 [1,1,0,1,2,0,0,1,1,1,1,9,4]);
157 my @revkeys=('Entrez Gene Status','RefSeq status','Official Full Name','chromosome','cyto','Reference','dblink',
158 'ALIAS_SYMBOL','OntologyTerm','Index terms','Official Symbol','cM','Property');
160 ok my $eio=Bio::SeqIO->new(-file=>test_input_file('entrezgene.dat'), -format=>'entrezgene', -debug=>'on',-service_record=>'yes');
162 my ($seq,$struct,$uncapt);
166 ($seq,$struct,$uncapt)=$eio->next_seq;
168 my @lc = @{$loop_counts[$num_of_seqs]};
173 is ref($struct),'Bio::Cluster::SequenceFamily';
174 my $acc=$seq->accession_number;
177 my $org=$seq->species->binomial;
178 is grep(/\b$org\b/,@species),1;
181 ok $seq->desc if ($acc eq '1')||($acc eq '2')||($acc eq '11304');
182 ok !defined $seq->desc if ($acc eq '171592')||($acc eq '11306');
184 #Are we supposed to have this in our test?
185 ok grep(/\b$acc\b/,@ids);
187 my $ann=$seq->annotation();
190 #T3: ENTREZGENE STATUS TESTS
191 my @egstatus=$ann->get_Annotations('Entrez Gene Status');
193 foreach my $status (@egstatus) {
196 if ($acc==1) {is $status->value,'live'; last STATUS;}
197 if ($acc==2) {is $status->value,'live'; last STATUS;}
198 if ($acc==4) {is $status->value,'discontinued'; last STATUS;}
199 if ($acc==6) {is $status->value,'discontinued'; last STATUS;}
200 if ($acc==11288) {is $status->value,'secondary'; last STATUS;}
201 if ($acc==11293) {is $status->value,'secondary'; last STATUS;}
202 if ($acc==171594) {is $status->value,'live'; last STATUS;}
205 is $loop_count, shift @lc, "correct number of loops for T3";
208 #T4: REFSEQ STATUS TESTS
209 my @refstatus=$ann->get_Annotations('RefSeq status');
210 foreach my $status (@refstatus) {
213 if ($acc==1) {is $status->value,'REVIEWED'; last STATUS;}
214 if ($acc==2) {is $status->value,'REVIEWED'; last STATUS;}
215 if ($acc==3) {is $status->value,'PROVISIONAL'; last STATUS;}
216 if ($acc==4) {is $status->value,'WITHDRAWN'; last STATUS;}
217 if ($acc==9) {is $status->value,'VALIDATED'; last STATUS;}
218 if ($acc==11300) {is $status->value,''; last STATUS;}
219 if ($acc==11306) {is $status->value,'MODEL'; last STATUS;}
220 if ($acc==11293) {is $status->value,'secondary'; last STATUS;}
221 if ($acc==171594) {is $status->value,'Reviewed'; last STATUS;}
224 is $loop_count, shift @lc, "correct number of loops for T4";
228 my @ofname=$ann->get_Annotations('Official Full Name');
229 foreach my $name (@ofname) {
232 if ($acc==10) {is $name->value,'N-acetyltransferase 2 (arylamine N-acetyltransferase)'; last STATUS;}
233 if ($acc==13) {is $name->value,'arylacetamide deacetylase (esterase)'; last STATUS;}
234 if ($acc==14) {is $name->value,'angio-associated, migratory cell protein'; last STATUS;}
235 if ($acc==11287) {is $name->value,'pregnancy zone protein'; last STATUS;}
236 if ($acc==11298) {is $name->value,'arylalkylamine N-acetyltransferase'; last STATUS;}
237 if ($acc==11304) {is $name->value,'ATP-binding cassette, sub-family A (ABC1), member 4'; last STATUS;}
238 if ($acc==11306) {is $name->value,'ATP-binding cassette, sub-family B (MDR/TAP), member 7'; last STATUS;}
241 is $loop_count, shift @lc, "correct number of loops for T5";
244 #T6: CHROMOSOME TESTS
245 my @chr=$ann->get_Annotations('chromosome');
246 foreach my $chr (@chr) {
249 if ($acc==5) {is $chr->value,1; last STATUS;}
250 if ($acc==6) {is $chr->value,1; last STATUS;}
251 if ($acc==7) {is $chr->value,17; last STATUS;}
252 if ($acc==11306) {is $chr->value,'X'; last STATUS;}
253 if ($acc==11304) {is $chr->value,3; last STATUS;}
254 if ($acc==171590) {is $chr->value,'I'; last STATUS;}
255 if ($acc==171592) {is $chr->value,'I'; last STATUS;}
258 is $loop_count, shift @lc, "correct number of loops for T6";
261 #T7: GENE SYMBOL ALIAS TESTS
262 my @sym=$ann->get_Annotations('ALIAS_SYMBOL');
263 foreach my $sym (@sym) {
265 my $val = $sym->display_text;
266 next if (($val eq '')||!defined($val));
267 is grep(/\b$val\b/,@{$asym{$acc}}),1;
269 is $loop_count, shift @lc, "correct number of loops for T7";
272 #T8: CYTO LOCATION TESTS
273 my @map=$ann->get_Annotations('cyto');
274 foreach my $map (@map) {
277 if ($acc==10) {is $map->value,'8p22'; last STATUS;}
278 if ($acc==11) {is $map->value,'8p22'; last STATUS;}
279 if ($acc==13) {is $map->value,'3q21.3-q25.2'; last STATUS;}
280 if ($acc==11306) {is $map->value,'X C-D'; last STATUS;}
281 if ($acc==11305) {is $map->value,'2 A2-B'; last STATUS;}
282 if ($acc==11304) {is $map->value,'3 G1'; last STATUS;}
283 if ($acc==11303) {is $map->value,'4 A5-B3'; last STATUS;}
286 is $loop_count, shift @lc, "correct number of loops for T8";
289 #T9: REFERENCE NUMBER TEST
290 my @refs=$ann->get_Annotations('Reference');
291 my $refs=$#refs+1||0;
292 is $pmed{$acc},$refs;
294 my @dblinks=$ann->get_Annotations('dblink');
295 my @keys=$ann->get_all_annotation_keys;
297 #T10: GENERIF AND OTHER DBLINK TESTS
298 my @url=qw(HGMD Ensembl KEGG Homologene);#Only validate the URL
299 foreach my $dblink (@dblinks) {
301 my $dbname=$dblink->database||'';
303 if ( $dbname eq 'generif') {#Should have ID and text
304 ok $dblink->primary_id;
305 ok $dblink->comment->text;
309 if (($dbname eq 'MIM')&&($dblink->authority)&&($dblink->authority eq 'phenotype')) {
310 ok $dblink->optional_id;
313 if ($dbname eq 'Evidence viewer') {
314 ok $dblink->url; #We may even validate the urls?
315 is $dblink->primary_id,2;
318 if ($dbname eq 'Model maker') {
319 ok $dblink->url; #We may even validate the urls?
320 is $dblink->primary_id,2;
323 if ($dbname eq 'AceView') {
324 ok $dblink->url; #We may even validate the urls?
325 is $dblink->primary_id,2;
328 if (grep(/$dbname/,@url)) {
329 ok $dblink->url; #We may even validate the urls?
332 if ($dbname eq 'GDB') {
333 is $dblink->primary_id,'GDB:119639'; #We may even validate the urls?
336 if ($dbname eq 'UniGene') {
337 ok $dblink->url; #We may even validate the urls?
338 is $dblink->primary_id,'Hs.212838';
341 if ($dbname eq 'PharmGKB') {
342 is $dblink->primary_id,'PA24357';
345 if ($dbname eq 'MGC') {
346 ok $dblink->url; #We may even validate the urls?
347 is $dblink->primary_id,'BC040071';
353 is $loop_count, shift @lc, "correct number of loops for T10";
356 #T11: SOME EXTERNAL DATABASE IDS TESTS
357 foreach my $key (@keys) {
359 next if grep(/\b$key\b/, @revkeys);
360 my @all=$ann->get_Annotations($key);
361 #Checking xref to some databases- OMIM, Wormbase and HGNC, others later
362 my $loop_count_internal = 0;
363 foreach my $pid (@all) {
364 $loop_count_internal++;
366 if (($acc==8)&&($key eq 'MIM')) {is $pid->value,'108985'; last DBID;}
367 if (($acc==9)&&($key eq 'HGNC')) {is $pid->value,'7645'; last DBID;}
368 if (($acc==11298)&&($key eq 'MGI')) {is $pid->value,'1328365'; last DBID;}
369 if (($acc==171593)&&($key eq 'AceView/WormGenes')) {is $pid->value,'1A502'; last DBID;}
370 if (($acc==171594)&&($key eq 'WormBase')) {is $pid->value,'Y48G1C.4'; last DBID;}
373 is $loop_count_internal, shift @lc, "correct number of loops for T11a";
375 is $loop_count, shift @lc, "correct number of loops for T11";
378 #T12: REFERENCE RECORD TEST
380 foreach my $ref (@refs) {
382 my $pmed=$ref->medline;
383 is grep(/\b$pmed\b/,@pubmed),1;
385 is $loop_count, shift @lc, "correct number of loops for T12";
389 #T13/14: STS Markers and Gene Ontology
390 my @syn=('MGI:707739','MPC786');
391 my @evid=qw(IEA TAS ISS);
393 $go{11305}=['5524', '16887', '5215', '8203', '6810', '16021' ,'5765'];
394 $go{11298}=['8080', '8415', '4060', '16740'];
395 $pmeds{11305}=['12466851'];
396 my @types=qw(Function Component Process);
397 if (($acc==11305)||($acc==11298)) { #Let's check just this two...
398 foreach my $ot ($ann->get_Annotations('OntologyTerm')) {
400 if (($ot->term->authority)&&($ot->term->authority eq 'STS marker')) {
402 is $ot->name,'AI413825';
403 is $ot->term->namespace,'UniSTS';
404 is $ot->identifier,158928;
407 is $ot->name,'D11Mit102';
408 is $ot->term->namespace,'UniSTS';
409 is $ot->identifier,126289;
410 foreach my $syn ($ot->get_synonyms) {
411 is grep(/\b$syn\b/,@syn),1;
416 my $evid=$ot->comment;
417 $evid=~s/evidence: //i;
418 my $type=$ot->ontology->name;
419 my @ref=$ot->term->get_references;
420 my $id=$ot->identifier;
421 my $thispmed=$ref[0]->medline if (@ref);
422 is grep(/\b$type\b/,@types),1;
423 is grep(/\b$id\b/,@{$go{$acc}}),1;
424 is grep(/\b$thispmed\b/,@{$pmeds{$acc}}),1 if ($thispmed);
427 is $loop_count, shift @lc, "correct number of loops for T13/14";
431 #T15/16/17: GENOMIC LOCATION TESTS/SEQUENCE TYPES TESTS/CONSERVED DOMAINS TESTS
432 my @gffs=('SEQ entrezgene gene location 63548355 63556668 . + .',
433 'SEQ entrezgene genestructure 63548355 63556668 . + .',
434 'SEQ entrezgene gene location 31124733 31133046 . + .',
435 'SEQ entrezgene genestructure 31124733 31133046 . + .',
436 'SEQ entrezgene gene location 8163589 8172398 . + .',
437 'SEQ entrezgene genestructure 8163589 8172398 . + .');
438 my @contigs=$struct->get_members;
439 my @auth=('mrna','genomic','product','mrna sequence','protein','peptide');#Known types....
440 foreach my $contig (@contigs) {
442 my $stype=$contig->authority;
443 is grep(/^$stype$/i,@auth),1;
444 if ($acc==1) {#Do just 1?
445 if (($contig->authority eq 'genomic')||($contig->authority eq 'Genomic')) {
446 foreach my $sf ($contig->get_SeqFeatures) {
447 $sf->source_tag('entrezgene');
448 my $gff=$sf->gff_string;
450 foreach my $gffstr (@gffs) {
451 if ($gffstr eq $gff) {
458 if ($contig->authority eq 'Product') {
459 is $contig->id,'NP_570602';
460 is $contig->accession_number,21071030;
461 foreach my $sf ($contig->get_SeqFeatures) {
462 foreach my $dblink ($sf->annotation->get_Annotations('dblink')) {
463 my $key=$dblink->{_anchor}?$dblink->{_anchor}:$dblink->optional_id;
464 my $db=$dblink->database;
465 next unless (($db =~/cdd/i)||($sf->primary_tag=~ /conserved/i));
468 ($key,$desc)=split(/:/,$key);
470 $desc=~s/^\s+//;#THIS SHOULD GO IN entrezgene.pm!!!
471 is $desc,'IGc2; Immunoglobulin C-2 Type';
472 is $key,'smart00408';
482 cmp_ok( $loop_count,'>=', shift @lc, "correct number of loops for T15");
485 is $num_of_seqs, 39, 'looped through correct number of sequences';
488 #, -locuslink=>'convert');
489 #See if we can convert to locuslink
490 #T18: BACKCOMPATIBILITY TESTS
491 my @llsp =('OFFICIAL_GENE_NAME','CHR','MAP','OFFICIAL_SYMBOL');
492 ok my $eio_b=Bio::SeqIO->new(-file=>test_input_file('entrezgene.dat'),-format=>'entrezgene', -debug=>'on',-service_record=>'yes',-locuslink=>'convert');
494 while (my $seq=$eio_b->next_seq) {
497 my $acc=$seq->accession_number;
498 is grep(/\b$acc\b/,@ids),1;
499 my $ann=$seq->annotation;
500 last if ($acc==4);#3 is enough? and 4 does not have gene name, so....
501 foreach my $key (@llsp) {
502 my @vals=$ann->get_Annotations($key);
506 is $loop_count, 4, "correct number of loops for T18";