4 Bio::Tools::TandemRepeatsFinder - a parser for Tandem Repeats Finder output
8 use Bio::Tools::TandemRepeatsFinder;
11 my $parser = Bio::Tools::Bio::Tools::TandemRepeatsFinder->new(-file => 'tandem_repeats.out');
13 # loop through results
14 while( my $feature = $parser->next_result ) {
16 # print the source sequence id, start, end, percent matches, and the consensus sequence
17 my ($percent_matches) = $feat->get_tag_values('percent_matches');
18 my ($consensus_sequence) = $feat->get_tag_values('consensus_sequence');
19 print $feat->seq_id()."\t".$feat->start()."\t".$feat->end()."\t$percent_matches\t$consensus_sequence\n";
25 A parser for Tandem Repeats Finder output.
26 Written and tested for version 4.00
28 Location, seq_id, and score are stored in Bio::SeqFeature::Generic feature.
29 All other data is stored in tags. The availabale tags are
46 The run_parameters are stored in a hashref with the following key:
60 User feedback is an integral part of the evolution of this and other
61 Bioperl modules. Send your comments and suggestions preferably to
62 the Bioperl mailing list. Your participation is much appreciated.
64 bioperl-l@bioperl.org - General discussion
65 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
69 Please direct usage questions or support issues to the mailing list:
71 I<bioperl-l@bioperl.org>
73 rather than to the module maintainer directly. Many experienced and
74 reponsive experts will be able look at the problem and quickly
75 address it. Please include a thorough description of the problem
76 with code and data examples if at all possible.
80 Report bugs to the Bioperl bug tracking system to help us keep track
81 of the bugs and their resolution. Bug reports can be submitted via
84 https://github.com/bioperl/bioperl-live/issues
86 =head1 AUTHOR - Eric Just
88 Email e-just@northwestern.edu
92 The rest of the documentation details each of the object methods.
93 Internal methods are usually preceded with a _
97 package Bio
::Tools
::TandemRepeatsFinder
;
99 use constant DEBUG
=> 0;
100 use Bio
::SeqFeature
::Generic
;
102 use base
qw(Bio::Root::Root Bio::Root::IO);
107 Usage : my $obj = Bio::Tools::TandemRepeatsFinder->new();
108 Function: Builds a new Bio::Tools::TandemRepeatsFinder object
109 Returns : Bio::Tools::TandemRepeatsFinder
110 Args : -fh/-file => $val, for initing input, see Bio::Root::IO
115 my ( $class, @args ) = @_;
117 my $self = $class->SUPER::new
(@args);
118 $self->_initialize_io(@args);
126 Usage : $self->version( $version )
127 Function: get/set the version of Tandem Repeats finder that was used in analysis
128 Returns : value of version of
129 Args : new value (optional)
134 my ( $self, $value ) = @_;
135 if ( defined $value ) {
136 $self->{'version'} = $value;
138 return $self->{'version'};
141 =head2 _current_seq_id
143 Title : _current_seq_id
144 Usage : $self->_current_seq_id( $current_seq_id )
145 Function: get/set the _current_seq_id
146 Returns : value of _current_seq_id
147 Args : new value (optional)
151 sub _current_seq_id
{
152 my ( $self, $value ) = @_;
153 if ( defined $value ) {
154 $self->{'_current_seq_id'} = $value;
156 return $self->{'_current_seq_id'};
159 =head2 _current_seq_description
161 Title : _current_seq_description
162 Usage : $self->_current_seq_description( $current_seq_id )
163 Function: get/set the _current_seq_description
164 Returns : value of _current_seq_description
165 Args : new value (optional)
169 sub _current_seq_description
{
170 my ( $self, $value ) = @_;
171 if ( defined $value ) {
172 $self->{'_current_seq_description'} = $value;
174 return $self->{'_current_seq_description'};
177 =head2 _current_parameters
179 Title : _current_parameters
180 Usage : $self->_current_parameters( $parameters_hashref )
181 Function: get/set the _current_parameters
182 Returns : hashref representing current parameters parsed from results file
191 Args : parameters hashref (optional)
195 sub _current_parameters
{
196 my ( $self, $value ) = @_;
197 if ( defined $value ) {
198 $self->{'_current_parameters'} = $value;
200 return $self->{'_current_parameters'};
206 Usage : my $r = $trf->next_result()
207 Function: Get the next result set from parser data
208 Returns : Bio::SeqFeature::Generic
215 while ( defined( $_ = $self->_readline() ) ) {
218 if (/^Version (.+)/) {
220 $self->warn("parsed version: $version\n") if DEBUG
;
221 $self->warn( qq{ Bio
::Tools
::TandemRepeatsFinder was written
and tested
for Tandem Repeats Masker Version
4.00 output
222 You appear to be using Verion
$version. Use at your own risk
.}) if ($version != 4);
223 $self->version($version);
226 # Parse Sequence identifier
227 # i.e. Sequence: DDB0215018 |Masked Chromosomal Sequence| Chr 2f
228 elsif ( /^Sequence: ([^\s]+)\s(.+)?/ ) {
230 my $seq_description = $2;
231 $self->warn("parsed sequence_id: $seq_id\n") if DEBUG
;
232 $self->_current_seq_id($seq_id);
233 $self->_current_seq_description($seq_description);
237 # i.e. Parameters: 2 7 7 80 10 50 12
238 elsif (/^Parameters: (.+)/) {
240 $self->warn("parsed parameters: $params\n") if DEBUG
;
242 my @param_array = split /\s/, $params;
245 match_weight
=> $param_array[0],
246 mismatch_weight
=> $param_array[1],
247 indel_weight
=> $param_array[2],
248 match_prob
=> $param_array[3],
249 indel_prob
=> $param_array[4],
250 min_score
=> $param_array[5],
251 max_period_size
=> $param_array[6]
253 $self->_current_parameters($param_hash);
257 # i.e. 13936 13960 12 2.1 12 100 0 50 16 8 52 24 1.70 T TTTTTTTTTT
258 elsif (/^\d+\s\d+\s\d+/) {
260 # call internal method to create Bio::SeqFeature::Generic
261 # to represent tandem repeat
262 return $self->_create_feature($_);
266 $self->warn( "UNPARSED LINE:\n" . $_ );
272 =head2 _create_feature
274 Title : _create_feature
275 Usage : internal method used by 'next_feature'
276 Function: Takes a line from the results file and creates a bioperl object
277 Returns : Bio::SeqFeature::Generic
282 sub _create_feature
{
283 my ( $self, $line ) = @_;
285 # split the line and store into named variables
286 my @element = split /\s/, $line;
288 $start, $end, $period_size,
289 $copy_number, $consensus_size, $percent_matches,
290 $percent_indels, $score, $percent_a,
291 $percent_c, $percent_g, $percent_t,
292 $entropy, $consensus_sequence, $repeat_sequence
295 # create tag hash from data in line
297 period_size
=> $period_size,
298 copy_number
=> $copy_number,
299 consensus_size
=> $consensus_size,
300 percent_matches
=> $percent_matches,
301 percent_indels
=> $percent_indels,
302 percent_a
=> $percent_a,
303 percent_c
=> $percent_c,
304 percent_g
=> $percent_g,
305 percent_t
=> $percent_t,
307 consensus_sequence
=> $consensus_sequence,
308 repeat_sequence
=> $repeat_sequence,
309 run_parameters
=> $self->_current_parameters(),
310 sequence_description
=> $self->_current_seq_description()
313 # create feature from start/end etc
314 my $feat = Bio
::SeqFeature
::Generic
->new(
315 -seq_id
=> $self->_current_seq_id(),
319 -source_tag
=> 'Tandem Repeats Finder',
320 -primary_tag
=> 'tandem repeat',