Bio::Tools::CodonTable and Bio::Tools::IUPAC: use our and drop BEGIN blocks.
[bioperl-live.git] / lib / Bio / Tools / TandemRepeatsFinder.pm
blobb23fbdc588194d61cd35a7b6dc7c52ecc759c6d3
2 =head1 NAME
4 Bio::Tools::TandemRepeatsFinder - a parser for Tandem Repeats Finder output
6 =head1 SYNOPSIS
8 use Bio::Tools::TandemRepeatsFinder;
10 # create parser
11 my $parser = Bio::Tools::Bio::Tools::TandemRepeatsFinder->new(-file => 'tandem_repeats.out');
13 # loop through results
14 while( my $feature = $parser->next_result ) {
16 # print the source sequence id, start, end, percent matches, and the consensus sequence
17 my ($percent_matches) = $feat->get_tag_values('percent_matches');
18 my ($consensus_sequence) = $feat->get_tag_values('consensus_sequence');
19 print $feat->seq_id()."\t".$feat->start()."\t".$feat->end()."\t$percent_matches\t$consensus_sequence\n";
23 =head1 DESCRIPTION
25 A parser for Tandem Repeats Finder output.
26 Written and tested for version 4.00
28 Location, seq_id, and score are stored in Bio::SeqFeature::Generic feature.
29 All other data is stored in tags. The available tags are
31 period_size
32 copy_number
33 consensus_size
34 percent_matches
35 percent_indels
36 percent_a
37 percent_c
38 percent_g
39 percent_t
40 entropy
41 consensus_sequence
42 repeat_sequence
43 run_parameters
44 sequence_description
46 The run_parameters are stored in a hashref with the following key:
48 match_weight
49 mismatch_weight
50 indel_weight
51 match_prob
52 indel_prob
53 min_score
54 max_period_size
56 =head1 FEEDBACK
58 =head2 Mailing Lists
60 User feedback is an integral part of the evolution of this and other
61 Bioperl modules. Send your comments and suggestions preferably to
62 the Bioperl mailing list. Your participation is much appreciated.
64 bioperl-l@bioperl.org - General discussion
65 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
67 =head2 Support
69 Please direct usage questions or support issues to the mailing list:
71 I<bioperl-l@bioperl.org>
73 rather than to the module maintainer directly. Many experienced and
74 reponsive experts will be able look at the problem and quickly
75 address it. Please include a thorough description of the problem
76 with code and data examples if at all possible.
78 =head2 Reporting Bugs
80 Report bugs to the Bioperl bug tracking system to help us keep track
81 of the bugs and their resolution. Bug reports can be submitted via
82 the web:
84 https://github.com/bioperl/bioperl-live/issues
86 =head1 AUTHOR - Eric Just
88 Email e-just@northwestern.edu
90 =head1 APPENDIX
92 The rest of the documentation details each of the object methods.
93 Internal methods are usually preceded with a _
95 =cut
97 package Bio::Tools::TandemRepeatsFinder;
99 use strict;
100 use constant DEBUG => 0;
101 use Bio::SeqFeature::Generic;
103 use base qw(Bio::Root::Root Bio::Root::IO);
105 =head2 new
107 Title : new
108 Usage : my $obj = Bio::Tools::TandemRepeatsFinder->new();
109 Function: Builds a new Bio::Tools::TandemRepeatsFinder object
110 Returns : Bio::Tools::TandemRepeatsFinder
111 Args : -fh/-file => $val, for initing input, see Bio::Root::IO
113 =cut
115 sub new {
116 my ( $class, @args ) = @_;
118 my $self = $class->SUPER::new(@args);
119 $self->_initialize_io(@args);
121 return $self;
124 =head2 version
126 Title : version
127 Usage : $self->version( $version )
128 Function: get/set the version of Tandem Repeats finder that was used in analysis
129 Returns : value of version of
130 Args : new value (optional)
132 =cut
134 sub version {
135 my ( $self, $value ) = @_;
136 if ( defined $value ) {
137 $self->{'version'} = $value;
139 return $self->{'version'};
142 =head2 _current_seq_id
144 Title : _current_seq_id
145 Usage : $self->_current_seq_id( $current_seq_id )
146 Function: get/set the _current_seq_id
147 Returns : value of _current_seq_id
148 Args : new value (optional)
150 =cut
152 sub _current_seq_id {
153 my ( $self, $value ) = @_;
154 if ( defined $value ) {
155 $self->{'_current_seq_id'} = $value;
157 return $self->{'_current_seq_id'};
160 =head2 _current_seq_description
162 Title : _current_seq_description
163 Usage : $self->_current_seq_description( $current_seq_id )
164 Function: get/set the _current_seq_description
165 Returns : value of _current_seq_description
166 Args : new value (optional)
168 =cut
170 sub _current_seq_description {
171 my ( $self, $value ) = @_;
172 if ( defined $value ) {
173 $self->{'_current_seq_description'} = $value;
175 return $self->{'_current_seq_description'};
178 =head2 _current_parameters
180 Title : _current_parameters
181 Usage : $self->_current_parameters( $parameters_hashref )
182 Function: get/set the _current_parameters
183 Returns : hashref representing current parameters parsed from results file
184 : keys are
185 match_weight
186 mismatch_weight
187 indel_weight
188 match_prob
189 indel_prob
190 min_score
191 max_period_size
192 Args : parameters hashref (optional)
194 =cut
196 sub _current_parameters {
197 my ( $self, $value ) = @_;
198 if ( defined $value ) {
199 $self->{'_current_parameters'} = $value;
201 return $self->{'_current_parameters'};
204 =head2 next_result
206 Title : next_result
207 Usage : my $r = $trf->next_result()
208 Function: Get the next result set from parser data
209 Returns : Bio::SeqFeature::Generic
210 Args : none
212 =cut
214 sub next_result {
215 my ($self) = @_;
216 while ( defined( $_ = $self->_readline() ) ) {
218 # Parse Version line
219 if (/^Version (.+)/) {
220 my $version = $1;
221 $self->warn("parsed version: $version\n") if DEBUG;
222 $self->warn( qq{ Bio::Tools::TandemRepeatsFinder was written and tested for Tandem Repeats Masker Version 4.00 output
223 You appear to be using Verion $version. Use at your own risk.}) if ($version != 4);
224 $self->version($version);
227 # Parse Sequence identifier
228 # i.e. Sequence: DDB0215018 |Masked Chromosomal Sequence| Chr 2f
229 elsif ( /^Sequence: ([^\s]+)\s(.+)?/ ) {
230 my $seq_id = $1;
231 my $seq_description = $2;
232 $self->warn("parsed sequence_id: $seq_id\n") if DEBUG;
233 $self->_current_seq_id($seq_id);
234 $self->_current_seq_description($seq_description);
237 # Parse Parameters
238 # i.e. Parameters: 2 7 7 80 10 50 12
239 elsif (/^Parameters: (.+)/) {
240 my $params = $1;
241 $self->warn("parsed parameters: $params\n") if DEBUG;
243 my @param_array = split /\s/, $params;
245 my $param_hash = {
246 match_weight => $param_array[0],
247 mismatch_weight => $param_array[1],
248 indel_weight => $param_array[2],
249 match_prob => $param_array[3],
250 indel_prob => $param_array[4],
251 min_score => $param_array[5],
252 max_period_size => $param_array[6]
254 $self->_current_parameters($param_hash);
257 # Parse Data
258 # i.e. 13936 13960 12 2.1 12 100 0 50 16 8 52 24 1.70 T TTTTTTTTTT
259 elsif (/^\d+\s\d+\s\d+/) {
261 # call internal method to create Bio::SeqFeature::Generic
262 # to represent tandem repeat
263 return $self->_create_feature($_);
266 elsif (DEBUG) {
267 $self->warn( "UNPARSED LINE:\n" . $_ );
270 return;
273 =head2 _create_feature
275 Title : _create_feature
276 Usage : internal method used by 'next_feature'
277 Function: Takes a line from the results file and creates a bioperl object
278 Returns : Bio::SeqFeature::Generic
279 Args : none
281 =cut
283 sub _create_feature {
284 my ( $self, $line ) = @_;
286 # split the line and store into named variables
287 my @element = split /\s/, $line;
288 my (
289 $start, $end, $period_size,
290 $copy_number, $consensus_size, $percent_matches,
291 $percent_indels, $score, $percent_a,
292 $percent_c, $percent_g, $percent_t,
293 $entropy, $consensus_sequence, $repeat_sequence
294 ) = @element;
296 # create tag hash from data in line
297 my $tags = {
298 period_size => $period_size,
299 copy_number => $copy_number,
300 consensus_size => $consensus_size,
301 percent_matches => $percent_matches,
302 percent_indels => $percent_indels,
303 percent_a => $percent_a,
304 percent_c => $percent_c,
305 percent_g => $percent_g,
306 percent_t => $percent_t,
307 entropy => $entropy,
308 consensus_sequence => $consensus_sequence,
309 repeat_sequence => $repeat_sequence,
310 run_parameters => $self->_current_parameters(),
311 sequence_description => $self->_current_seq_description()
314 # create feature from start/end etc
315 my $feat = Bio::SeqFeature::Generic->new(
316 -seq_id => $self->_current_seq_id(),
317 -score => $score,
318 -start => $start,
319 -end => $end,
320 -source_tag => 'Tandem Repeats Finder',
321 -primary_tag => 'tandem repeat',
322 -tag => $tags
325 return $feat;