1 # -*-Perl-*- Test Harness script for Bioperl
10 test_begin(-tests => 325);
12 use_ok('Bio::Factory::FTLocationFactory');
15 my $simple_impl = "Bio::Location::Simple";
16 my $fuzzy_impl = "Bio::Location::Fuzzy";
17 my $split_impl = "Bio::Location::Split";
19 # Holds strings and results. The latter is an array of expected class name,
20 # min/max start position and position type, min/max end position and position
21 # type, location type, the number of locations, and the strand.
24 # note: the following are directly taken from
25 # http://www.insdc.org/documents/feature_table.html#3.4.3
27 => [$simple_impl, 467, 467, "EXACT",
31 => [$simple_impl, 340, 340, "EXACT",
35 => [$fuzzy_impl, undef, 345, "BEFORE",
39 => [$fuzzy_impl, undef, 1, "BEFORE",
43 => [$fuzzy_impl, 1, 1, "EXACT",
47 => [$fuzzy_impl, 102, 102, "EXACT",
51 => [$fuzzy_impl, 23, 45, "WITHIN",
54 "(122.133)..(204.221)"
55 => [$fuzzy_impl, 122, 133, "WITHIN",
59 => [$simple_impl, 123, 123, "EXACT",
63 => [$fuzzy_impl, 145, 145, "EXACT",
66 "join(12..78,134..202)"
67 => [$split_impl, 12, 12, "EXACT",
70 "complement(join(2691..4571,4918..5163))"
71 => [$split_impl, 2691, 2691, "EXACT",
74 # Partial frameshifted gene at the end of a contig
75 "complement(join(94468..94578,94578..>94889))"
76 => [$split_impl, 94468, 94468, "EXACT",
77 94889, undef, "AFTER",
79 "complement(34..(122.126))"
80 => [$fuzzy_impl, 34, 34, "EXACT",
84 => [$simple_impl, 100, 100, "EXACT",
87 "join(1..100,J00194.1:100..202)"
88 => [$split_impl, 1, 1, "EXACT",
92 # this variant is not really allowed by the FT definition
93 # document but we want to be able to cope with it
95 => [$simple_impl, 100, 100, "EXACT",
98 "((122.133)..(204.221))"
99 => [$fuzzy_impl, 122, 133, "WITHIN",
102 "join(AY016290.1:108..185,AY016291.1:1546..1599)"
103 => [$split_impl, 108, 108, "EXACT",
107 # UNCERTAIN locations and positions (Swissprot)
109 => [$fuzzy_impl, 2465, 2465, "UNCERTAIN",
113 => [$fuzzy_impl, 22, 22, "EXACT",
117 => [$fuzzy_impl, 22, 22, "UNCERTAIN",
121 => [$fuzzy_impl, undef, undef, "UNCERTAIN",
125 => [$fuzzy_impl, undef, 1, "BEFORE",
126 undef, undef, "UNCERTAIN",
129 => [$fuzzy_impl, undef, undef, "UNCERTAIN",
133 => [$fuzzy_impl, 1, 1, "EXACT",
134 undef, undef, "UNCERTAIN",
137 => [$fuzzy_impl, undef, undef, "UNCERTAIN",
138 undef, undef, "UNCERTAIN",
141 => [$fuzzy_impl, 1, 1, "UNCERTAIN",
146 my $locfac = Bio::Factory::FTLocationFactory->new();
147 isa_ok($locfac,'Bio::Factory::LocationFactoryI');
149 # sorting is to keep the order constant from one run to the next
150 foreach my $locstr (keys %testcases) {
151 my $loc = $locfac->from_string($locstr);
152 if($locstr eq "join(AY016290.1:108..185,AY016291.1:1546..1599)") {
153 $loc->seq_id("AY016295.1");
155 if($locstr eq "join(1..100,J00194.1:100..202)") {
156 $loc->seq_id("unknown");
158 my @res = @{$testcases{$locstr}};
159 is(ref($loc), $res[0], $res[0]);
160 is($loc->min_start(), $res[1]);
161 is($loc->max_start(), $res[2]);
162 is($loc->start_pos_type(), $res[3]);
163 is($loc->min_end(), $res[4]);
164 is($loc->max_end(), $res[5]);
165 is($loc->end_pos_type(), $res[6]);
166 is($loc->location_type(), $res[7]);
167 my @locs = $loc->each_Location();
169 my $ftstr = $loc->to_FTstring();
170 # this is a somewhat ugly hack, but we want clean output from to_FTstring()
171 # Umm, then these should really fail, correct?
172 # Should we be engineering workarounds for tests?
173 $locstr = "J00194:100..202" if $locstr eq "J00194:(100..202)";
174 $locstr = "(122.133)..(204.221)" if $locstr eq "((122.133)..(204.221))";
176 is($ftstr, $locstr, "Location String: $locstr");
177 # test strand production
178 is($loc->strand(), $res[9]);
182 skip('nested matches in regex only supported in v5.6.1 and higher', 8) unless $^V gt v5.6.0;
184 # Tests based on location definition (http://www.insdc.org/documents/feature_table.html#3.4)
185 my $string1 = 'complement(join(2691..4571,4918..5163))';
186 my $string2 = 'join(complement(4918..5163),complement(2691..4571))';
187 my $loc1 = $locfac->from_string($string1);
188 my $loc2 = $locfac->from_string($string2);
189 my $loc1_str = $loc1->to_FTstring;
190 my $loc2_str = $loc2->to_FTstring;
191 is($loc1_str, $string1, $string1);
192 is($loc2_str, $string1, $string2);
193 is($loc1_str, $loc2_str, 'equivalent remote location strings');
195 # Test for equivalent reverse strand locations adding one remote component
196 $string1 = 'complement(join(TEST0001.1:2691..4571,4918..5163))';
197 $string2 = 'join(complement(4918..5163),complement(TEST0001.1:2691..4571))';
198 $loc1 = $locfac->from_string($string1);
199 $loc2 = $locfac->from_string($string2);
200 $loc1_str = $loc1->to_FTstring;
201 $loc2_str = $loc2->to_FTstring;
202 is($loc1_str, $string1, $string1);
203 is($loc2_str, $string1, $string2);
204 is($loc1_str, $loc2_str, 'equivalent remote location strings');
206 # Test for equivalent reverse strand locations adding two remote components
207 $string1 = 'complement(join(TEST0001.1:2691..4571,TEST0008.1:4918..5163))';
208 $string2 = 'join(complement(TEST0008.1:4918..5163),complement(TEST0001.1:2691..4571))';
209 $loc1 = $locfac->from_string($string1);
210 $loc2 = $locfac->from_string($string2);
211 $loc1_str = $loc1->to_FTstring;
212 $loc2_str = $loc2->to_FTstring;
213 is($loc1_str, $string1, $string1);
214 is($loc2_str, $string1, $string2);
215 is($loc1_str, $loc2_str, 'equivalent remote location strings');
217 # bug #1674, #1765, 2101
218 # EMBL-like (BAC19856.3 protein)
219 # join(20464..20694,21548..22763,join(complement(314652..314672),complement(232596..232990),complement(231520..231669)))
221 # join(20464..20694,21548..22763,complement(join(231520..231669,232596..232990,314652..314672)))
223 # join(1000..2000,join(3000..4000,join(5000..6000,7000..8000)),9000..10000)
225 # join(1000..2000,3000..4000,5000..6000,7000..8000,9000..10000)
227 my @expected = (# intentionally testing same expected string twice
228 # as I am providing two different encodings
229 # that should mean the same thing
230 'join(11025..11049,complement(join(315036..315294,251354..251412,241499..241580,239890..240081)))',
231 'join(11025..11049,complement(join(315036..315294,251354..251412,241499..241580,239890..240081)))',
233 'join(20464..20694,21548..22763,complement(join(231520..231669,232596..232990,314652..314672)))',
234 'join(20464..20694,21548..22763,complement(join(231520..231669,232596..232990,314652..314672)))',
235 # this is just seen once
236 'join(1000..2000,3000..4000,5000..6000,7000..8000,9000..10000)',
237 'order(S67862.1:72..75,join(S67863.1:1..788,1..19))'
241 'join(11025..11049,join(complement(239890..240081),complement(241499..241580),complement(251354..251412),complement(315036..315294)))',
242 'join(11025..11049,complement(join(315036..315294,251354..251412,241499..241580,239890..240081)))',
243 'join(20464..20694,21548..22763,complement(join(231520..231669,232596..232990,314652..314672)))',
244 'join(20464..20694,21548..22763,join(complement(314652..314672),complement(232596..232990),complement(231520..231669)))',
245 'join(1000..2000,join(3000..4000,join(5000..6000,7000..8000)),9000..10000)',
246 'order(S67862.1:72..75,join(S67863.1:1..788,1..19))'
248 my $loc = $locfac->from_string($locstr);
249 my $ftstr = $loc->to_FTstring();
250 is($ftstr, shift @expected, $locstr);