A test to ensure Bio::PrimarySeqI->trunc() doesn't use clone() for a Bio::Seq::RichSe...
[bioperl-live.git] / Bio / Tools / TandemRepeatsFinder.pm
blob3d45852919738e8f9907b6e83056de718cf89724
2 =head1 NAME
4 Bio::Tools::TandemRepeatsFinder - a parser for Tandem Repeats Finder output
6 =head1 SYNOPSIS
8 use Bio::Tools::TandemRepeatsFinder;
10 # create parser
11 my $parser = Bio::Tools::Bio::Tools::TandemRepeatsFinder->new(-file => 'tandem_repeats.out');
13 # loop through results
14 while( my $feature = $parser->next_result ) {
16 # print the source sequence id, start, end, percent matches, and the consensus sequence
17 my ($percent_matches) = $feat->get_tag_values('percent_matches');
18 my ($consensus_sequence) = $feat->get_tag_values('consensus_sequence');
19 print $feat->seq_id()."\t".$feat->start()."\t".$feat->end()."\t$percent_matches\t$consensus_sequence\n";
23 =head1 DESCRIPTION
25 A parser for Tandem Repeats Finder output.
26 Written and tested for version 4.00
28 Location, seq_id, and score are stored in Bio::SeqFeature::Generic feature.
29 All other data is stored in tags. The availabale tags are
31 period_size
32 copy_number
33 consensus_size
34 percent_matches
35 percent_indels
36 percent_a
37 percent_c
38 percent_g
39 percent_t
40 entropy
41 consensus_sequence
42 repeat_sequence
43 run_parameters
44 sequence_description
46 The run_parameters are stored in a hashref with the following key:
48 match_weight
49 mismatch_weight
50 indel_weight
51 match_prob
52 indel_prob
53 min_score
54 max_period_size
56 =head1 FEEDBACK
58 =head2 Mailing Lists
60 User feedback is an integral part of the evolution of this and other
61 Bioperl modules. Send your comments and suggestions preferably to
62 the Bioperl mailing list. Your participation is much appreciated.
64 bioperl-l@bioperl.org - General discussion
65 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
67 =head2 Support
69 Please direct usage questions or support issues to the mailing list:
71 I<bioperl-l@bioperl.org>
73 rather than to the module maintainer directly. Many experienced and
74 reponsive experts will be able look at the problem and quickly
75 address it. Please include a thorough description of the problem
76 with code and data examples if at all possible.
78 =head2 Reporting Bugs
80 Report bugs to the Bioperl bug tracking system to help us keep track
81 of the bugs and their resolution. Bug reports can be submitted via
82 the web:
84 https://github.com/bioperl/bioperl-live/issues
86 =head1 AUTHOR - Eric Just
88 Email e-just@northwestern.edu
90 =head1 APPENDIX
92 The rest of the documentation details each of the object methods.
93 Internal methods are usually preceded with a _
95 =cut
97 package Bio::Tools::TandemRepeatsFinder;
98 use strict;
99 use constant DEBUG => 0;
100 use Bio::SeqFeature::Generic;
102 use base qw(Bio::Root::Root Bio::Root::IO);
104 =head2 new
106 Title : new
107 Usage : my $obj = Bio::Tools::TandemRepeatsFinder->new();
108 Function: Builds a new Bio::Tools::TandemRepeatsFinder object
109 Returns : Bio::Tools::TandemRepeatsFinder
110 Args : -fh/-file => $val, for initing input, see Bio::Root::IO
112 =cut
114 sub new {
115 my ( $class, @args ) = @_;
117 my $self = $class->SUPER::new(@args);
118 $self->_initialize_io(@args);
120 return $self;
123 =head2 version
125 Title : version
126 Usage : $self->version( $version )
127 Function: get/set the version of Tandem Repeats finder that was used in analysis
128 Returns : value of version of
129 Args : new value (optional)
131 =cut
133 sub version {
134 my ( $self, $value ) = @_;
135 if ( defined $value ) {
136 $self->{'version'} = $value;
138 return $self->{'version'};
141 =head2 _current_seq_id
143 Title : _current_seq_id
144 Usage : $self->_current_seq_id( $current_seq_id )
145 Function: get/set the _current_seq_id
146 Returns : value of _current_seq_id
147 Args : new value (optional)
149 =cut
151 sub _current_seq_id {
152 my ( $self, $value ) = @_;
153 if ( defined $value ) {
154 $self->{'_current_seq_id'} = $value;
156 return $self->{'_current_seq_id'};
159 =head2 _current_seq_description
161 Title : _current_seq_description
162 Usage : $self->_current_seq_description( $current_seq_id )
163 Function: get/set the _current_seq_description
164 Returns : value of _current_seq_description
165 Args : new value (optional)
167 =cut
169 sub _current_seq_description {
170 my ( $self, $value ) = @_;
171 if ( defined $value ) {
172 $self->{'_current_seq_description'} = $value;
174 return $self->{'_current_seq_description'};
177 =head2 _current_parameters
179 Title : _current_parameters
180 Usage : $self->_current_parameters( $parameters_hashref )
181 Function: get/set the _current_parameters
182 Returns : hashref representing current parameters parsed from results file
183 : keys are
184 match_weight
185 mismatch_weight
186 indel_weight
187 match_prob
188 indel_prob
189 min_score
190 max_period_size
191 Args : parameters hashref (optional)
193 =cut
195 sub _current_parameters {
196 my ( $self, $value ) = @_;
197 if ( defined $value ) {
198 $self->{'_current_parameters'} = $value;
200 return $self->{'_current_parameters'};
203 =head2 next_result
205 Title : next_result
206 Usage : my $r = $trf->next_result()
207 Function: Get the next result set from parser data
208 Returns : Bio::SeqFeature::Generic
209 Args : none
211 =cut
213 sub next_result {
214 my ($self) = @_;
215 while ( defined( $_ = $self->_readline() ) ) {
217 # Parse Version line
218 if (/^Version (.+)/) {
219 my $version = $1;
220 $self->warn("parsed version: $version\n") if DEBUG;
221 $self->warn( qq{ Bio::Tools::TandemRepeatsFinder was written and tested for Tandem Repeats Masker Version 4.00 output
222 You appear to be using Verion $version. Use at your own risk.}) if ($version != 4);
223 $self->version($version);
226 # Parse Sequence identifier
227 # i.e. Sequence: DDB0215018 |Masked Chromosomal Sequence| Chr 2f
228 elsif ( /^Sequence: ([^\s]+)\s(.+)?/ ) {
229 my $seq_id = $1;
230 my $seq_description = $2;
231 $self->warn("parsed sequence_id: $seq_id\n") if DEBUG;
232 $self->_current_seq_id($seq_id);
233 $self->_current_seq_description($seq_description);
236 # Parse Parameters
237 # i.e. Parameters: 2 7 7 80 10 50 12
238 elsif (/^Parameters: (.+)/) {
239 my $params = $1;
240 $self->warn("parsed parameters: $params\n") if DEBUG;
242 my @param_array = split /\s/, $params;
244 my $param_hash = {
245 match_weight => $param_array[0],
246 mismatch_weight => $param_array[1],
247 indel_weight => $param_array[2],
248 match_prob => $param_array[3],
249 indel_prob => $param_array[4],
250 min_score => $param_array[5],
251 max_period_size => $param_array[6]
253 $self->_current_parameters($param_hash);
256 # Parse Data
257 # i.e. 13936 13960 12 2.1 12 100 0 50 16 8 52 24 1.70 T TTTTTTTTTT
258 elsif (/^\d+\s\d+\s\d+/) {
260 # call internal method to create Bio::SeqFeature::Generic
261 # to represent tandem repeat
262 return $self->_create_feature($_);
265 elsif (DEBUG) {
266 $self->warn( "UNPARSED LINE:\n" . $_ );
269 return;
272 =head2 _create_feature
274 Title : _create_feature
275 Usage : internal method used by 'next_feature'
276 Function: Takes a line from the results file and creates a bioperl object
277 Returns : Bio::SeqFeature::Generic
278 Args : none
280 =cut
282 sub _create_feature {
283 my ( $self, $line ) = @_;
285 # split the line and store into named variables
286 my @element = split /\s/, $line;
287 my (
288 $start, $end, $period_size,
289 $copy_number, $consensus_size, $percent_matches,
290 $percent_indels, $score, $percent_a,
291 $percent_c, $percent_g, $percent_t,
292 $entropy, $consensus_sequence, $repeat_sequence
293 ) = @element;
295 # create tag hash from data in line
296 my $tags = {
297 period_size => $period_size,
298 copy_number => $copy_number,
299 consensus_size => $consensus_size,
300 percent_matches => $percent_matches,
301 percent_indels => $percent_indels,
302 percent_a => $percent_a,
303 percent_c => $percent_c,
304 percent_g => $percent_g,
305 percent_t => $percent_t,
306 entropy => $entropy,
307 consensus_sequence => $consensus_sequence,
308 repeat_sequence => $repeat_sequence,
309 run_parameters => $self->_current_parameters(),
310 sequence_description => $self->_current_seq_description()
313 # create feature from start/end etc
314 my $feat = Bio::SeqFeature::Generic->new(
315 -seq_id => $self->_current_seq_id(),
316 -score => $score,
317 -start => $start,
318 -end => $end,
319 -source_tag => 'Tandem Repeats Finder',
320 -primary_tag => 'tandem repeat',
321 -tag => $tags
324 return $feat;