1 # -*-Perl-*- Test Harness script for Bioperl
10 test_begin(-tests => 116);
12 use_ok('Bio::Location::Simple');
13 use_ok('Bio::Coordinate::Pair');
14 use_ok('Bio::Coordinate::ExtrapolatingPair');
15 use_ok('Bio::Coordinate::GeneMapper');
21 # No gaps returned, matches extrapolated
22 # returns always a match or undef
27 # the reverse strand pair
28 my $inr = Bio::Location::Simple->new(-start=>2, -end=>5, -strand=>1);
29 my $outr = Bio::Location::Simple->new(-start=>10, -end=>13, -strand=>-1);
30 ok my $pairr = Bio::Coordinate::ExtrapolatingPair->
35 my $posr = Bio::Location::Simple->new
36 (-start => 3, -end => 4, -strand=> 1 );
37 my $resr = $pairr->map($posr);
45 my $match1 = Bio::Location::Simple->new
46 (-seq_id => 'propeptide', -start => 21, -end => 40, -strand=>1 );
48 my $match2 = Bio::Location::Simple->new
49 (-seq_id => 'peptide', -start => 1, -end => 20, -strand=>1 );
51 ok my $pair = Bio::Coordinate::ExtrapolatingPair->
58 is $pair->strand(), 1; # = in->strand * out->strand
59 is $pair->in->seq_id(), 'propeptide';
60 is $pair->strict(), 1;
62 my ($count, $pos, $pos2, $res, $match, $res2);
65 $pos = Bio::Location::Simple->new
66 (-start => 25, -end => 25, -strand=> -1 );
67 $res = $pair->map($pos);
69 isa_ok $res, 'Bio::Location::Simple';
73 is $res->seq_id, 'peptide';
76 # match outside = undef
77 $pos = Bio::Location::Simple->new (-start => 5, -end => 5 );
78 $res = $pair->map($pos);
83 # partial match = match
85 $pos2 = Bio::Location::Simple->new
86 (-start => 20, -end => 22, -strand=> -1 );
88 ok $res = $pair->map($pos2);
92 is $res->seq_id, 'peptide';
97 # partial match2 = match & gap
99 $pos2 = Bio::Location::Simple->new (-start => 40, -end => 41, -strand=> 1 );
100 ok $res = $pair->map($pos2);
107 $pos2 = Bio::Location::Simple->new (-start => 19, -end => 41, -strand=> 1 );
108 ok $res = $pair->map($pos2);
113 # testing the changing the strand
117 $match1 = Bio::Location::Simple->new
118 (-seq_id => 'chr', -start => 21, -end => 40, -strand=>1 );
120 $match2 = Bio::Location::Simple->new
121 (-seq_id => 'gene', -start => 1, -end => 20, -strand=>-1 );
123 $pair = Bio::Coordinate::ExtrapolatingPair->
124 #my $pair = Bio::Coordinate::Pair->
130 $pos = Bio::Location::Simple->new
131 (-start => 38, -end => 40, -strand=> 1 );
132 $res = $pair->map($pos);
137 $pos = Bio::Location::Simple->new
138 (-start => 1, -end => 3, -strand=> 1 );
139 $res = $pair->map($pos);
151 ok my $m = Bio::Coordinate::GeneMapper->new(-in => 'propeptide',
155 is $m->peptide_offset(5), 5;
159 $pos = Bio::Location::Simple->new
160 (-start => 25, -end => 25, -strand=> 1 );
161 $res = $m->map($pos);
166 is $res->seq_id, 'peptide';
174 $pos = Bio::Location::Simple->new
175 (-start => 4, -end => 5, -strand=> 1 );
176 $res = $m->map($pos);
180 is $m->nozero('in&out'), 'in&out';
181 $res = $m->map($pos);
189 $pos = Bio::Location::Simple->new
190 (-start => 5, -end => 5, -strand=> 1 );
191 $res = $m->map($pos);
195 is $m->in('cds'), 'cds';
196 is $m->out('propeptide'), 'propeptide';
198 $res = $m->map($pos);
200 ok $res = $m->_translate($pos);
202 ok $res = $m->_reverse_translate($pos);
206 $pos = Bio::Location::Simple->new
207 (-start => 26, -end => 26, -strand=> 1 );
209 $res = $m->map($pos);
217 $pos = Bio::Location::Simple->new
218 (-start => 1, -end => 3, -strand=> 1 );
219 $res = $m->_frame($pos);
224 # Collection representing exons
228 # gene 1 5 11 15 21 25
230 #-----|-----------------------|---|--
231 # chr 1 5 9 15 19 25 29
235 my $e1 = Bio::Location::Simple->new
236 (-seq_id => 'gene', -start => 5, -end => 9, -strand=>1 );
237 my $e2 = Bio::Location::Simple->new
238 (-seq_id => 'gene', -start => 15, -end => 19, -strand=>1 );
239 my $e3 = Bio::Location::Simple->new
240 (-seq_id => 'gene', -start => 25, -end => 29, -strand=>1 );
241 my @cexons = ($e1, $e2, $e3);
243 $m= Bio::Coordinate::GeneMapper->new();
247 my $off = $m->cds(5);
248 is $off->start, 5; # start of the coding region
249 is $m->exons(@cexons), 3;
252 $pos = Bio::Location::Simple->new
253 (-start => 6, -end => 7, -strand=> 1 );
254 $res = $m->map($pos);
259 $m->out('negative_intron');
260 $pos = Bio::Location::Simple->new
261 (-start => 12, -end => 14, -strand=> 1 );
262 $res = $m->map($pos);
265 is $res->seq_id, 'intron1';
270 $pos = Bio::Location::Simple->new
271 (-start => 5, -end => 9, -strand=> 1 );
272 $res = $m->map($pos);
276 $pos = Bio::Location::Simple->new
277 (-start => 15, -end => 25, -strand=> 1 );
278 $res = $m->map($pos);
282 $pos = Bio::Location::Simple->new
283 (-start => 5, -end => 19, -strand=> 1 );
284 $res = $m->map($pos);
290 # chr to cds ; ranges into one
292 my $exons = Bio::Location::Split->new(-seq_id => 'gene');
293 $exons->add_sub_Location($e1);
294 $exons->add_sub_Location($e2);
295 $exons->add_sub_Location($e3);
297 $res = $m->map($exons);
298 isa_ok $res,'Bio::Location::Simple';
303 # cds to chr; single range into two
308 $pos = Bio::Location::Simple->new
309 (-start => 4, -end => 7, -strand=> 1 );
310 $res = $m->map($pos);
316 # Collection representing exons
318 # cds -11 -7 -6 -2 -1 3 :27
319 # cds -6 -2 -1 1 3 4 8 :17
321 # gene -21 -17 -11 -7 -1 1 3 :27
322 # gene -11 -7 -1 1 3 9 13 :17
324 #-----|-----------------------|---|--
325 # chr 1 5 9 15 19 25 29
328 $m= Bio::Coordinate::GeneMapper->new();
333 is $off->start, 17; # start of the coding region
334 is $m->exons(@cexons), 3;
336 # testing parameter handling in the constructor
337 ok $m = Bio::Coordinate::GeneMapper->new(-in => 'gene',
348 # Mapping SNPs into human serum protein MSE55 and
349 # human galecting LGALS2 from Ensembl:
352 #Ensembl Gene ID Exon Start (Chr bp) Exon End (Chr bp) Exon Coding Start (Chr bp)
353 # Exon Coding End (Chr bp) Strand
355 my @gene1_dump = split ( /\n/, qq {
356 ENSG00000128283 34571058 34571126 1
357 ENSG00000128283 34576610 34577350 34576888 34577350 1
358 ENSG00000128283 34578646 34579858 34578646 34579355 1
362 my @gene2_dump = split ( /\n/, qq {
363 ENSG00000100079 34590438 34590464 -1
364 ENSG00000100079 34582387 34582469 34582387 34582469 -1
365 ENSG00000100079 34581114 34581273 34581114 34581273 -1
366 ENSG00000100079 34580784 34580950 34580804 34580950 -1
367 }); # exon start should be less than end or is this intentional?
369 #Chromosome Name Location (bp) Strand Reference ID
370 my @snp_dump = split ( /\n/, qq {
371 22 34572694 1 2235335
372 22 34572799 1 2235336
373 22 34572843 1 2235337
374 22 34574896 1 2076087
375 22 34575256 1 2076088
376 22 34578830 1 2281098
377 22 34579111 1 2281099
378 22 34580411 1 2235338
379 22 34580591 1 2281097
380 22 34580845 1 2235339
381 22 34581963 1 2281100
385 22 34588207 1 2284055
386 22 34591507 1 1969639
391 my ($cdsr, @exons) = read_gene_data(@gene1_dump);
393 ok my $g1 = Bio::Coordinate::GeneMapper->new(-in=>'chr', -out=>'gene');
396 #$pos = Bio::Location::Simple->new
397 # (-start => 34576888, -end => 34579355, -strand=> 1 );
398 $res = $g1->map($cdsr);
405 $res = $g1->map($res);
409 #map_snps($g1, @snp_dump);
412 #gene 2 in reverse strand
413 ($cdsr, @exons) = read_gene_data(@gene2_dump);
414 ok my $g2 = Bio::Coordinate::GeneMapper->new(-in=>'chr', -out=>'gene');
417 $pos = Bio::Location::Simple->new
418 (-start => $cdsr->end-2, -end => $cdsr->end, -strand=> 1 );
419 $res = $g2->map($pos);
427 #map_snps($g2, @snp_dump);
430 $match1 = Bio::Location::Simple->new
431 (-seq_id => 'a', -start => 5, -end => 17, -strand=>1 );
432 $match2 = Bio::Location::Simple->new
433 (-seq_id => 'b', -start => 1, -end => 13, -strand=>-1 );
434 ok $pair = Bio::Coordinate::Pair->new(-in => $match1,
442 ok my $split = Bio::Location::Split->new();
443 ok $split->add_sub_Location(Bio::Location::Simple->new(-start=>6,
446 $split->add_sub_Location(Bio::Location::Simple->new(-start=>15,
450 $res=$pair->map($split);
451 ok my @sublocs = $res->each_Location(1);
454 #print Dumper \@sublocs;
455 is $sublocs[0]->start, 2;
456 is $sublocs[0]->end, 3;
457 is $sublocs[1]->start, 10;
458 is $sublocs[1]->end, 12;
460 # testing cds -> gene/chr which generates a split location from a simple one
461 # exons in reverse strand!
468 #-----|-------------------
473 $e1 = Bio::Location::Simple->new
474 (-seq_id => 'gene', -start => 5, -end => 9, -strand=>-1 );
475 $e2 = Bio::Location::Simple->new
476 (-seq_id => 'gene', -start => 15, -end => 19, -strand=>-1 );
477 @cexons = ($e1, $e2);
478 my $cds= Bio::Location::Simple->new
479 (-seq_id => 'gene', -start => 5, -end => 17, -strand=>-1 );
481 $m = Bio::Coordinate::GeneMapper->new(-in=>'cds', -out=>'chr');
483 $m->cds($cds); # this has to be set first!?
484 is $m->exons(@cexons), 2;
487 my $cds_f= Bio::Location::Simple->new
488 (-start => 2, -end => 7, );
489 $res = $m->map($cds_f);
491 ok @sublocs = $res->each_Location(1);
494 is $sublocs[0]->start, 6;
495 is $sublocs[0]->end, 9;
496 is $sublocs[1]->start, 15;
497 is $sublocs[1]->end, 16;
500 # test inex, exon & negative_intron
505 $pos = Bio::Location::Simple->new
506 (-seq_id => 'gene', -start => 2, -end => 10, -strand=> 1 );
508 $res = $m->map($pos);
509 is $res->each_Location, 3;
513 $res = $m->map($pos);
518 $m->out('negative_intron');
519 $res = $m->map($pos);
524 is $m->_mapper_code2string('1-2'), 'chr-gene';
525 is $m->_mapper_string2code('chr-gene'), '1-2';
529 # strict mapping mode
530 # extrapolating pair code into Bio::Coordinate::Pair ?
538 my ($self,@gene_dump) = @_;
539 my ($cds_start, $cds_end, $strand, @exons);
542 my ($first, $first_line);
543 for my $line ( @gene_dump ) {
545 my ($geneid, $exon_start, $exon_end, $exon_cstart,
546 $exon_cend, $exon_strand) = split /\t/, $line;
548 $strand = $exon_strand if $exon_strand;
549 #print join (' ', $geneid, $exon_start, $exon_strand), "\n";
551 # CDS location in chromosome coordinates
552 $cds_start = $exon_cstart if !$cds_start and $exon_cstart;
553 $cds_end = $exon_cend if $exon_cend;
556 if ($exon_start > $exon_end) {
557 ($exon_start, $exon_end) = ($exon_end, $exon_start);
560 my $exon = Bio::Location::Simple->new
561 (-seq_id => 'gene', -start => $exon_start,
562 -end => $exon_end, -strand=>$strand, -verbose=>2);
566 if ($cds_start > $cds_end) {
567 ($cds_start, $cds_end) = ($cds_end, $cds_start);
570 my $cdsr = Bio::Location::Simple->new (-start => $cds_start,
574 return ($cdsr, @exons);
579 my ($mapper, @snps) =@_;
582 foreach my $line (@snps) {
585 my ($chr, $start, $strand, $id) = split /\t/, $line;
586 my $loc = Bio::Location::Simple->new
588 -end => $start, -strand=>$strand );
590 my $res = $mapper->map($loc);
592 $cds_start = $res->start if defined $res;#defined $res->start;
593 print $id, "\t", $cds_start, "\n";
597 $mapper->out('propeptide');
598 my $frame_obj = $mapper->_frame($res);
599 my $res = $mapper->map($loc);
601 $cds_start = $res->start if defined $res;#defined $res->start;
602 print "\t\t", $cds_start, " (", $frame_obj->start, ")\n";