1 # -*-Perl-*- Test Harness script for Bioperl
10 test_begin(-tests => 175);
12 use_ok('Bio::Location::Simple');
13 use_ok('Bio::Coordinate::Pair');
14 use_ok('Bio::Coordinate::Result::Match');
15 use_ok('Bio::Coordinate::Result::Gap');
16 use_ok('Bio::Coordinate::Chain');
17 use_ok('Bio::Coordinate::Collection');
22 ok $c = Bio::Coordinate::Result::Match-> new;
23 ok $c = Bio::Coordinate::Result::Gap-> new;
26 my $match1 = Bio::Location::Simple->new
27 (-seq_id => 'propeptide', -start => 21, -end => 40, -strand=>1 );
29 my $match2 = Bio::Location::Simple->new
30 (-seq_id => 'peptide', -start => 1, -end => 20, -strand=>1 );
32 ok my $pair = Bio::Coordinate::Pair->new(-in => $match1,
34 -negative => 0, # false, default
38 is $pair->strand(), 1; # = in->strand * out->strand
39 is $pair->in->seq_id(), 'propeptide';
42 my ($count, $pos, $pos2, $res, $match, $res2);
48 $pos = Bio::Location::Simple->new
49 (-start => 25, -end => 25, -strand=> -1 );
51 # results are in Bio::Coordinate::Result
52 # they can be Matches and Gaps; are Bio::LocationIs
53 ok $res = $pair->map($pos);
54 isa_ok $res, 'Bio::Coordinate::Result';
55 isa_ok $res, 'Bio::Location::SplitLocationI';
56 is $res->each_match, 1;
58 is $res->each_Location, 1;
60 isa_ok $res->match, 'Bio::LocationI';
61 isa_ok $res->match, 'Bio::Coordinate::Result::Match';
62 is $res->match->start, 5;
63 is $res->match->end, 5;
64 is $res->match->strand, -1;
65 is $res->match->seq_id, 'peptide';
69 #is $res->seq_id, 'peptide';
74 $res2 = $pair->map($match);
75 is $res2->match->start, $pos->start;
76 is $res2->match->end, $pos->end;
77 is $res2->match->strand, $pos->strand;
78 is $res2->match->seq_id, $pair->out->seq_id;
84 $pos = Bio::Location::Simple->new (-start => 5, -end => 5 );
86 ok $res = $pair->map($pos);
88 is $res->each_Location, 1;
91 isa_ok $res->gap, 'Bio::Coordinate::Result::Gap';
92 isa_ok $res->gap, 'Bio::LocationI';
93 is $res->gap->strand, 1;
94 is $res->gap->start, 5;
95 is $res->gap->length, $pos->length;
96 is $res->gap->seq_id, 'propeptide';
100 # partial match = gap & match
102 $pos2 = Bio::Location::Simple->new
103 (-start => 20, -end => 22, -strand=> -1 );
105 ok $res = $pair->map($pos2);
107 is $res->each_match, 1;
108 is $res->each_gap, 1;
109 is $res->each_Location, 2;
110 is $res->match->length + $res->gap->length, $pos2->length;
112 is $res->match->start, 1;
113 is $res->match->end, 2;
114 is $res->match->seq_id, 'peptide';
115 is $res->match->strand, -1;
116 is $res->gap->start, 20;
117 is $res->gap->end, 20;
118 is $res->gap->seq_id, 'propeptide';
119 is $res->gap->strand, -1;
122 # partial match = match & gap
124 $pos2 = Bio::Location::Simple->new (-start => 40, -end => 41, -strand=> 1 );
125 ok $res = $pair->map($pos2);
126 is $res->match->length + $res->gap->length, $pos2->length;
131 $pos2 = Bio::Location::Simple->new (-start => 19, -end => 41, -strand=> 1 );
132 ok $res = $pair->map($pos2);
133 $count = 0; map {$count += $_->length} $res->each_Location;
134 is $count, $pos2->length;
143 $pos = Bio::Location::Simple->new (-start => 5, -end => 6, -location_type=>'^');
144 $res = $pair->map($pos);
145 is $res->each_gap, 1;
146 is $res->each_Location, 1;
149 $pos = Bio::Location::Simple->new (-start => 21, -end => 22, -location_type=>'^');
150 $res = $pair->map($pos);
151 is $res->each_match, 1;
152 is $res->each_Location, 1;
155 $pos = Bio::Location::Simple->new (-start => 20, -end => 21, -location_type=>'^');
156 $res = $pair->map($pos);
157 is $res->each_gap, 1;
158 is $res->each_Location, 1;
161 $pos = Bio::Location::Simple->new (-start => 40, -end => 41, -location_type=>'^');
162 $res = $pair->map($pos);
163 is $res->each_gap, 1;
164 is $res->each_Location, 1;
176 $match1 = Bio::Location::Simple->new
177 (-seq_id => 'from', -start => 2, -end => 11, -strand=>1 );
179 $match2 = Bio::Location::Simple->new
180 (-seq_id => 'to', -start => 2, -end => 11, -strand=>-1 );
181 $pair = Bio::Coordinate::Pair->new(-in => $match1,
189 is $pair->strand(), -1;
190 $pos = Bio::Location::Simple->new
191 (-seq_id => 'from', -start => 7, -end => 9, -strand=>1 );
192 $res = $pair->map($pos);
193 is $res->match->start, 4;
194 is $res->match->end, 6;
195 is $res->match->strand, -1;
197 $pos = Bio::Location::Simple->new
198 (-seq_id => 'from', -start => 3, -end => 10, -strand=>-1 );
199 $res = $pair->map($pos);
200 is $res->match->start, 3;
201 is $res->match->end, 10;
202 is $res->match->strand, 1;
205 # match outside = Gap
207 $pos = Bio::Location::Simple->new
208 (-seq_id => 'from', -start => 1, -end => 1, -strand=>1 );
209 $res = $pair->map($pos);
210 is $res->gap->start, 1;
211 is $res->gap->end, 1;
212 is $res->gap->strand, 1;
213 $pos = Bio::Location::Simple->new
214 (-seq_id => 'from', -start => 12, -end => 12, -strand=>-1 );
215 $res = $pair->map($pos);
216 is $res->gap->start, 12;
217 is $res->gap->end, 12;
218 is $res->gap->strand, -1;
222 # partial match1 = gap & match
224 $pos = Bio::Location::Simple->new
225 (-seq_id => 'from', -start => 1, -end => 7, -strand=>-1 );
226 $res = $pair->map($pos);
227 is $res->gap->start, 1;
228 is $res->gap->end, 1;
229 is $res->gap->strand, -1;
230 is $res->match->start, 6;
231 is $res->match->end, 11;
232 is $res->match->strand, 1;
235 # partial match2 = match & gap
238 $pos = Bio::Location::Simple->new
239 (-seq_id => 'from', -start => 9, -end => 12, -strand=>-1 );
240 $res = $pair->map($pos);
241 is $res->match->start, 2;
242 is $res->match->end, 4;
243 is $res->match->strand, 1;
244 is $res->gap->start, 12;
245 is $res->gap->end, 12;
246 is $res->gap->strand, -1;
252 $pos = Bio::Location::Simple->new
253 (-seq_id => 'from', -start => 1, -end => 12, -strand=>-1 );
254 $res = $pair->map($pos);
255 is $res->match->start, 2;
256 is $res->match->end, 11;
257 is $res->match->strand, 1;
259 my ($gap1, $gap2) = $res->each_gap;
262 is $gap1->strand, -1;
265 is $gap2->strand, -1;
270 # chain (two) mappers together
274 $match1 = Bio::Location::Simple->new
275 (-seq_id => 'propeptide', -start => 5, -end => 40, -strand=>1 );
277 $match2 = Bio::Location::Simple->new
278 (-seq_id => 'peptide', -start => 1, -end => 36, -strand=>1 );
280 ok $pair = Bio::Coordinate::Pair->new(-in => $match1,
285 ok my $chain = Bio::Coordinate::Chain->new;
286 ok $chain->add_mapper($pair);
287 $chain->add_mapper($pair);
290 $pos = Bio::Location::Simple->new
291 (-seq_id => 'from', -start => 6, -end => 21, -strand=> 1 );
295 $match = $chain->map($pos);
296 isa_ok $match, 'Bio::Coordinate::Result::Match';
299 is $match->strand, 1;
308 #-----|-----------------------
313 $match1 = Bio::Location::Simple->new
314 (-seq_id => 'gene', -start => 5, -end => 9, -strand=>1 );
316 $match2 = Bio::Location::Simple->new
317 (-seq_id => 'exon1', -start => 1, -end => 5, -strand=>1 );
319 ok my $pair1 = Bio::Coordinate::Pair->new(-in => $match1,
323 my $match3 = Bio::Location::Simple->new
324 (-seq_id => 'gene', -start => 15, -end => 19, -strand=>1 );
326 my $match4 = Bio::Location::Simple->new
327 (-seq_id => 'exon2', -start => 6, -end => 10, -strand=>1 );
329 ok my $pair2 = Bio::Coordinate::Pair->new(-in => $match3,
333 ok my $transcribe = Bio::Coordinate::Collection->new;
334 ok $transcribe->add_mapper($pair1);
335 ok $transcribe->add_mapper($pair2);
339 $pos = Bio::Location::Simple->new (-start => 5, -end => 9 );
340 ok $res = $transcribe->map($pos);
341 is $res->match->start, 1;
342 is $res->match->end, 5;
343 is $res->match->seq_id, 'exon1';
346 $pos = Bio::Location::Simple->new (-start => 2, -end => 9 );
347 ok $res = $transcribe->map($pos);
348 is $res->each_gap, 1;
349 is $res->each_match, 1;
350 is $res->match->start, 1;
351 is $res->match->end, 5;
354 $pos = Bio::Location::Simple->new (-start => 5, -end => 12 );
355 ok $res = $transcribe->map($pos);
356 is $res->each_gap, 1;
357 is $res->each_match, 1;
358 is $res->match->start, 1;
359 is $res->match->end, 5;
361 # match more than two
362 $pos = Bio::Location::Simple->new (-start => 5, -end => 19 );
363 ok $res = $transcribe->map($pos);
364 is $res->each_gap, 2;
365 is $res->each_match, 2;
373 #-----|-----------------------|---|--
378 # create the third pair
380 my $match5 = Bio::Location::Simple->new
381 (-seq_id => 'gene', -start => 25, -end => 29, -strand=>1 );
383 my $match6 = Bio::Location::Simple->new
384 (-seq_id => 'exon3', -start => 11, -end => 15, -strand=>1 );
386 my $pair3 = Bio::Coordinate::Pair->new(-in => $match5,
390 # create a new collection in wrong order
391 $transcribe = Bio::Coordinate::Collection->new;
392 $transcribe->add_mapper($pair3);
393 $transcribe->add_mapper($pair1);
394 $transcribe->add_mapper($pair2);
395 ok $transcribe->sort;
397 map {push @res, $_->in->start } $transcribe->each_mapper;
398 ok compare_arrays ([5, 15, 25], \@res);
402 # Test using genomic data
405 my $mapper = Bio::Coordinate::Collection->new;
407 load_data($mapper, undef );
409 # transform a segment entirely within the first rawcontig
410 #test_transform ($mapper,
411 # [627012, 2, 5, -1, "rawcontig"],
412 # ["chr1", 2, 5, -1]);
413 $pos = Bio::Location::Simple->new (-start => 2, -end => 5, -strand => -1);
414 $res = $mapper->map($pos);
415 is $res->match->start, 2;
416 is $res->match->end, 5;
417 is $res->match->strand, -1;
418 is $res->match->seq_id, '627012';
422 [314696, 31917, 31937, -1],
423 [341, 126, 59773, -1],
424 [315843, 5332, 5963, +1]
426 $pos = Bio::Location::Simple->new (-start => 383700, -end => 444000, -strand => 1);
427 $res = $mapper->map($pos);
428 @res = $res->each_match;
429 compare (shift @res, shift @testres);
430 compare (shift @res, shift @testres);
431 compare (shift @res, shift @testres);
435 [627011, 7447, 7507, +1],
436 ["chr1", 273762, 273781, 1]
438 $pos = Bio::Location::Simple->new (-start => 273701, -end => 273781, -strand => 1);
439 $res = $mapper->map($pos);
440 is $res->each_match, 1;
441 is $res->each_gap, 1;
442 @res = $res->each_Location;
443 compare (shift @res, shift @testres);
444 compare (shift @res, shift @testres);
447 $pos = Bio::Location::Simple->new
448 (-start => 2, -end => 5, -strand => -1, -seq_id => '627012');
449 $res = $mapper->map($pos);
450 is $res->match->start, 2;
451 is $res->match->end, 5;
452 is $res->match->strand, -1;
453 is $res->match->seq_id, 'chr1';
456 # tests for split locations
459 # testing a simple pair
460 $match1 = Bio::Location::Simple->new
461 (-seq_id => 'a', -start => 5, -end => 17, -strand=>1 );
462 $match2 = Bio::Location::Simple->new
463 (-seq_id => 'b', -start => 1, -end => 13, -strand=>-1 );
465 $pair = Bio::Coordinate::Pair->new(-in => $match1,
471 ok my $split = Bio::Location::Split->new();
472 ok $split->add_sub_Location(Bio::Location::Simple->new(-start=>6,
475 $split->add_sub_Location(Bio::Location::Simple->new(-start=>15,
479 $res=$pair->map($split);
480 ok my @sublocs = $res->each_Location(1);
483 #print Dumper \@sublocs;
484 is $sublocs[0]->start, 2;
485 is $sublocs[0]->end, 3;
486 is $sublocs[1]->start, 10;
487 is $sublocs[1]->end, 12;
495 use Bio::Coordinate::Utils;
496 use Bio::LocatableSeq;
497 use Bio::SimpleAlign;
500 #y $out = IO::String->new($string);
505 my $s1 = Bio::LocatableSeq->new(-id => 'AAA',
506 -seq => '--wtatgtng',
511 my $s2 = Bio::LocatableSeq->new(-id => 'BBB',
512 -seq => '-aaaat-tt-',
517 $a = Bio::SimpleAlign->new();
522 ok my $uti = Bio::Coordinate::Utils->new;
523 $mapper = $uti->from_align($a);
524 #print Dumper $mapper;
525 is $mapper->return_match, 1;
526 is $mapper->return_match(1), 1;
529 $pos = Bio::Location::Simple->new
530 (-start => 4, -end => 8, -strand => 1);
531 $res = $mapper->map($pos);
536 # subroutines only after this
540 my ($first, $second) = @_;
542 return 0 unless @$first == @$second;
543 for (my $i = 0; $i < @$first; $i++) {
544 return 0 if $first->[$i] ne $second->[$i];
551 my ($match, $test) = @_;
552 is $match->seq_id eq $test->[0], 1,
553 "Match: |". $match->seq_id. "| Test: ". $test->[0]. "|";
554 is $match->start, $test->[1];
555 is $match->end, $test->[2];
556 is $match->strand, $test->[3];
561 my ($map, $reverse) = @_;
563 #chr_name raw_id chr_start chr_end raw_start raw_end raw_ori
564 my @sgp_dump = split ( /\n/, qq {
565 chr1 627012 1 31276 1 31276 1
566 chr1 627010 31377 42949 72250 83822 -1
567 chr1 2768 42950 180950 251 138251 1
568 chr1 10423 180951 266154 1 85204 -1
569 chr1 627011 266255 273761 1 7507 1
570 chr1 314698 273862 283122 1 9261 -1
571 chr1 627009 283223 331394 251 48422 -1
572 chr1 314695 331395 352162 1 20768 -1
573 chr1 314697 352263 359444 1 7182 -1
574 chr1 314696 359545 383720 31917 56092 -1
575 chr1 341 383721 443368 126 59773 -1
576 chr1 315843 443369 444727 5332 6690 1
577 chr1 315844 444828 453463 1 8636 -1
578 chr1 315834 453564 456692 1 3129 1
579 chr1 315831 456793 458919 1 2127 1
580 chr1 315827 459020 468965 251 10196 -1
581 chr1 544782 468966 469955 1 990 -1
582 chr1 315837 470056 473446 186 3576 -1
583 chr1 544807 473447 474456 1 1010 -1
584 chr1 315832 474557 477289 1 2733 1
585 chr1 544806 477390 477601 1086 1297 -1
586 chr1 315840 477602 482655 21 5074 1
587 chr1 544802 482656 483460 1 805 -1
588 chr1 544811 483561 484162 6599 7200 -1
589 chr1 315829 484163 498439 15 14291 -1
590 chr1 544813 498440 500980 1 2541 -1
591 chr1 544773 501081 502190 1217 2326 -1
592 chr1 315828 502191 513296 72 11177 1
593 chr1 544815 513297 517276 2179 6158 1
594 chr1 315836 517277 517662 2958 3343 1
595 chr1 544805 517663 520643 299 3279 1
596 chr1 315835 520744 521682 2462 3400 -1
597 chr1 544784 521683 526369 54 4740 1
598 chr1 544796 526470 527698 1 1229 1
599 chr1 315833 527799 528303 2530 3034 -1
600 chr1 544803 528304 531476 1 3173 -1
601 chr1 544821 531577 532691 1 1115 1
602 chr1 544810 532792 533843 1 1052 1
603 chr1 544800 533944 535249 1 1306 1
604 chr1 544786 535350 536652 1 1303 1
605 chr1 544814 536753 538358 1 1606 1
606 chr1 544812 538459 540004 1 1546 1
607 chr1 544818 540105 541505 1 1401 1
608 chr1 544816 541606 542693 1 1088 1
609 chr1 544778 542794 544023 1 1230 1
610 chr1 544779 544124 545709 1 1586 1
611 chr1 544804 545810 547660 1 1851 1
612 chr1 544774 547761 550105 1 2345 1
613 chr1 544817 550206 552105 1 1900 1
614 chr1 544781 552206 553640 1 1435 1
615 chr1 315830 553741 555769 1 2029 -1
616 chr1 544819 555870 558904 1 3035 -1
617 chr1 544777 559005 560670 1 1666 1
618 chr1 544795 560771 563092 1 2322 1
619 chr1 544809 563193 565523 1 2331 1
620 chr1 544808 565624 568113 1 2490 1
621 chr1 544798 568214 570324 1 2111 1
622 chr1 544783 570425 574640 1 4216 1
623 chr1 544824 574741 578101 1 3361 1
624 chr1 544775 578202 580180 1 1979 -1
625 chr1 544825 580281 581858 1 1578 -1
626 chr1 544772 581959 585312 1 3354 1
627 chr1 544793 585413 588740 1 3328 1
628 chr1 544785 588841 591656 1 2816 -1
629 chr1 544791 591757 594687 1 2931 1
630 chr1 544820 594788 597671 1 2884 1
631 chr1 544790 597772 601587 1 3816 1
632 chr1 544794 601688 603324 1 1637 -1
633 chr1 544823 603425 607433 1 4009 1
634 chr1 544789 607534 610856 1 3323 1
635 chr1 544799 610957 614618 1 3662 1
636 chr1 544776 614719 618674 1 3956 -1
637 chr1 544797 618775 624522 1 5748 -1
638 chr1 544787 624623 629720 1 5098 -1
639 chr1 544792 629821 637065 1 7245 1
640 chr1 622020 837066 851064 1 13999 -1
641 chr1 622021 851165 854101 1 2937 -1
642 chr1 622016 854202 856489 1 2288 -1
643 chr1 625275 856590 888524 420 32354 -1
644 chr1 622015 888525 891483 1 2959 -1
645 chr1 622024 891584 896208 8871 13495 -1
646 chr1 625537 896209 952170 1 55962 -1
647 chr1 625538 952271 1051812 251 99792 -1
648 chr1 625277 1051813 1055193 1 3381 -1
649 chr1 625266 1055294 1062471 1 7178 -1
650 chr1 598266 1062572 1086504 11 23943 -1
651 chr1 625271 1086505 1096571 3943 14009 1
652 chr1 625265 1096572 1100161 2436 6025 -1
653 chr1 173125 1100162 1106067 3329 9234 -1
654 chr1 598265 1106068 1112101 286 6319 1
655 chr1 625360 1112102 1172572 251 60721 1
656 chr1 173111 1172573 1172716 1 144 -1
657 chr1 173103 1172817 1173945 1 1129 1
658 chr1 170531 1174046 1174188 8791 8933 -1
659 chr1 625363 1174189 1183590 67 9468 1
660 chr1 173120 1183591 1183929 153 491 -1
661 chr1 170509 1183930 1184112 864 1046 1
662 chr1 173119 1184213 1189703 1 5491 -1
663 chr1 625357 1189804 1213915 1 24112 1
664 chr1 625359 1214016 1216330 1 2315 1
666 # test the auto-sorting feature
667 # @sgp_dump = reverse (@sgp_dump) if defined $reverse;
670 for my $line ( @sgp_dump ) {
671 if( $first ) { $first = 0; next; }
672 my ( $chr_name, $contig_id, $chr_start, $chr_end,
673 $contig_start, $contig_end, $contig_strand ) =
674 split ( /\t/, $line );
676 my $match1 = Bio::Location::Simple->new
677 (-seq_id => $chr_name, -start => $chr_start,
678 -end => $chr_end, -strand=>1 );
679 my $match2 = Bio::Location::Simple->new
680 (-seq_id => $contig_id, -start => $contig_start,
681 -end => $contig_end, -strand=>$contig_strand );
683 my $pair = Bio::Coordinate::Pair->new(-in => $match1,
686 $map->add_mapper($pair);