sync w/ main trunk
[bioperl-live.git] / Bio / Coordinate / Utils.pm
blob10817ea1699218d6e3be64655b55f2935d2e9d68
1 # $Id$
3 # BioPerl module for Bio::Coordinate::Utils
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
9 # Copyright Heikki Lehvaslaiho
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
15 =head1 NAME
17 Bio::Coordinate::Utils - Additional methods to create Bio::Coordinate objects
19 =head1 SYNOPSIS
21 use Bio::Coordinate::Utils;
22 # get a Bio::Align::AlignI compliant object, $aln, somehow
23 # it could be a Bio::SimpleAlign
25 $mapper = Bio::Coordinate::Utils->from_align($aln, 1);
27 # Build a set of mappers which will map, for each sequence,
28 # that sequence position in the alignment (exon position to alignment
29 # position)
30 my @mappers = Bio::Coordinate::Utils->from_seq_to_alignmentpos($aln);
33 =head1 DESCRIPTION
35 This class is a holder of methods that work on or create
36 Bio::Coordinate::MapperI- compliant objects. . These methods are not
37 part of the Bio::Coordinate::MapperI interface and should in general
38 not be essential to the primary function of sequence objects. If you
39 are thinking of adding essential functions, it might be better to
40 create your own sequence class. See L<Bio::PrimarySeqI>,
41 L<Bio::PrimarySeq>, and L<Bio::Seq> for more.
43 =head1 FEEDBACK
45 =head2 Mailing Lists
47 User feedback is an integral part of the evolution of this and other
48 Bioperl modules. Send your comments and suggestions preferably to one
49 of the Bioperl mailing lists. Your participation is much appreciated.
51 bioperl-l@bioperl.org - General discussion
52 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
54 =head2 Support
56 Please direct usage questions or support issues to the mailing list:
58 L<bioperl-l@bioperl.org>
60 rather than to the module maintainer directly. Many experienced and
61 reponsive experts will be able look at the problem and quickly
62 address it. Please include a thorough description of the problem
63 with code and data examples if at all possible.
65 =head2 Reporting Bugs
67 Report bugs to the Bioperl bug tracking system to help us keep track
68 the bugs and their resolution. Bug reports can be submitted via the
69 web:
71 http://bugzilla.open-bio.org/
73 =head1 AUTHOR - Heikki Lehvaslaiho
75 Email: heikki-at-bioperl-dot-org
77 Jason Stajich jason at bioperl.org
79 =head1 APPENDIX
81 The rest of the documentation details each of the object
82 methods. Internal methods are usually preceded with a _
84 =cut
87 # Let the code begin...
90 package Bio::Coordinate::Utils;
92 use Bio::Location::Simple;
93 use Bio::Coordinate::Pair;
94 use Bio::Coordinate::Collection;
96 use strict;
98 use base qw(Bio::Root::Root);
99 # new inherited from Root
101 =head2 from_align
103 Title : from_align
104 Usage : $mapper = Bio::Coordinate::Utils->from_align($aln, 1);
105 Function:
106 Create a mapper out of an alignment.
107 The mapper will return a value only when both ends of
108 the input range find a match.
110 Note: This implementation works only on pairwise alignments
111 and is not yet well tested!
113 Returns : A Bio::Coordinate::MapperI
114 Args : Bio::Align::AlignI object
115 Id for the reference sequence, optional
117 =cut
119 sub from_align {
120 my ($self, $aln, $ref ) = @_;
122 $aln->isa('Bio::Align::AlignI') ||
123 $self->throw('Not a Bio::Align::AlignI object but ['. ref($aln). ']');
125 # default reference sequence to the first sequence
126 $ref ||= 1;
128 my $collection = Bio::Coordinate::Collection->new(-return_match=>1);
130 # this works only for pairs, so split the MSA
131 # take the ref
132 #foreach remaining seq in aln, do:
133 $aln->map_chars('\.','-');
134 my $cs = $aln->gap_line;
135 my $seq1 = $aln->get_seq_by_pos(1);
136 my $seq2 = $aln->get_seq_by_pos(2);
137 while ( $cs =~ /([^\-]+)/g) {
138 # alignment coordinates
139 my $lenmatch = length($1);
140 my $start = pos($cs) - $lenmatch +1;
141 my $end = $start + $lenmatch -1;
142 my $match1 = Bio::Location::Simple->new
143 (-seq_id => $seq1->id,
144 -start => $seq1->location_from_column($start)->start,
145 -end => $seq1->location_from_column($end)->start,
146 -strand => $seq1->strand );
148 my $match2 = Bio::Location::Simple->new
149 (-seq_id => $seq2->id,
150 -start => $seq2->location_from_column($start)->start,
151 -end => $seq2->location_from_column($end)->start,
152 -strand => $seq2->strand );
154 my $pair = Bio::Coordinate::Pair->new
155 (-in => $match1,
156 -out => $match2
158 unless( $pair->test ) {
159 $self->warn(join("",
160 "pair align did not pass test ($start..$end):\n",
161 "\tm1=",$match1->to_FTstring(), " len=",
162 $match1->length,
163 " m2=", $match2->to_FTstring()," len=",
164 $match2->length,"\n"));
166 $collection->add_mapper($pair);
168 return ($collection->each_mapper)[0] if $collection->mapper_count == 1;
169 return $collection;
173 =head2 from_seq_to_alignmentpos
175 Title : from_seq_to_alignmentpos
176 Usage : $mapper = Bio::Coordinate::Utils->from_seq_to_alignmentpos($aln, 1);
177 Function:
178 Create a mapper out of an alignment.
179 The mapper will map the position of a sequence into that position
180 in the alignment.
182 Will work on alignments of >= 2 sequences
183 Returns : An array of Bio::Coordinate::MapperI
184 Args : Bio::Align::AlignI object
186 =cut
189 sub from_seq_to_alignmentpos {
190 my ($self, $aln ) = @_;
192 $aln->isa('Bio::Align::AlignI') ||
193 $self->throw('Not a Bio::Align::AlignI object but ['. ref($aln). ']');
195 # default reference sequence to the first sequence
196 my @mappers;
197 $aln->map_chars('\.','-');
198 for my $seq ( $aln->each_seq ) {
199 my $collection = Bio::Coordinate::Collection->new(-return_match=>1);
200 my $cs = $seq->seq();
201 # do we change this over to use index and substr for speed?
202 while ( $cs =~ /([^\-]+)/g) {
203 # alignment coordinates
204 my $lenmatch = length($1);
205 my $start = pos($cs) - $lenmatch +1;
206 my $end = $start + $lenmatch -1;
208 my $match1 = Bio::Location::Simple->new
209 (-seq_id => $seq->id,
210 -start => $seq->location_from_column($start)->start,
211 -end => $seq->location_from_column($end)->start,
212 -strand => $seq->strand );
214 my $match2 = Bio::Location::Simple->new
215 (-seq_id => 'alignment',
216 -start => $start,
217 -end => $end,
218 -strand => 0 );
220 my $pair = Bio::Coordinate::Pair->new
221 (-in => $match1,
222 -out => $match2
224 unless ( $pair->test ) {
225 $self->warn(join("",
226 "pair align did not pass test ($start..$end):\n",
227 "\tm1=",$match1->to_FTstring(), " len=",
228 $match1->length,
229 " m2=", $match2->to_FTstring()," len=",
230 $match2->length,"\n"));
232 $collection->add_mapper($pair);
234 if( $collection->mapper_count == 1) {
235 push @mappers, ($collection->each_mapper)[0];
236 } else {
237 push @mappers, $collection;
240 return @mappers;