4 Bio::Tools::TandemRepeatsFinder - a parser for Tandem Repeats Finder output
8 use Bio::Tools::TandemRepeatsFinder;
11 my $parser = Bio::Tools::Bio::Tools::TandemRepeatsFinder->new(-file => 'tandem_repeats.out');
13 # loop through results
14 while( my $feature = $parser->next_result ) {
16 # print the source sequence id, start, end, percent matches, and the consensus sequence
17 my ($percent_matches) = $feat->get_tag_values('percent_matches');
18 my ($consensus_sequence) = $feat->get_tag_values('consensus_sequence');
19 print $feat->seq_id()."\t".$feat->start()."\t".$feat->end()."\t$percent_matches\t$consensus_sequence\n";
25 A parser for Tandem Repeats Finder output.
26 Written and tested for version 4.00
28 Location, seq_id, and score are stored in Bio::SeqFeature::Generic feature.
29 All other data is stored in tags. The available tags are
46 The run_parameters are stored in a hashref with the following key:
60 User feedback is an integral part of the evolution of this and other
61 Bioperl modules. Send your comments and suggestions preferably to
62 the Bioperl mailing list. Your participation is much appreciated.
64 bioperl-l@bioperl.org - General discussion
65 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
69 Please direct usage questions or support issues to the mailing list:
71 I<bioperl-l@bioperl.org>
73 rather than to the module maintainer directly. Many experienced and
74 reponsive experts will be able look at the problem and quickly
75 address it. Please include a thorough description of the problem
76 with code and data examples if at all possible.
80 Report bugs to the Bioperl bug tracking system to help us keep track
81 of the bugs and their resolution. Bug reports can be submitted via
84 https://github.com/bioperl/bioperl-live/issues
86 =head1 AUTHOR - Eric Just
88 Email e-just@northwestern.edu
92 The rest of the documentation details each of the object methods.
93 Internal methods are usually preceded with a _
97 package Bio
::Tools
::TandemRepeatsFinder
;
100 use constant DEBUG
=> 0;
101 use Bio
::SeqFeature
::Generic
;
103 use base
qw(Bio::Root::Root Bio::Root::IO);
108 Usage : my $obj = Bio::Tools::TandemRepeatsFinder->new();
109 Function: Builds a new Bio::Tools::TandemRepeatsFinder object
110 Returns : Bio::Tools::TandemRepeatsFinder
111 Args : -fh/-file => $val, for initing input, see Bio::Root::IO
116 my ( $class, @args ) = @_;
118 my $self = $class->SUPER::new
(@args);
119 $self->_initialize_io(@args);
127 Usage : $self->version( $version )
128 Function: get/set the version of Tandem Repeats finder that was used in analysis
129 Returns : value of version of
130 Args : new value (optional)
135 my ( $self, $value ) = @_;
136 if ( defined $value ) {
137 $self->{'version'} = $value;
139 return $self->{'version'};
142 =head2 _current_seq_id
144 Title : _current_seq_id
145 Usage : $self->_current_seq_id( $current_seq_id )
146 Function: get/set the _current_seq_id
147 Returns : value of _current_seq_id
148 Args : new value (optional)
152 sub _current_seq_id
{
153 my ( $self, $value ) = @_;
154 if ( defined $value ) {
155 $self->{'_current_seq_id'} = $value;
157 return $self->{'_current_seq_id'};
160 =head2 _current_seq_description
162 Title : _current_seq_description
163 Usage : $self->_current_seq_description( $current_seq_id )
164 Function: get/set the _current_seq_description
165 Returns : value of _current_seq_description
166 Args : new value (optional)
170 sub _current_seq_description
{
171 my ( $self, $value ) = @_;
172 if ( defined $value ) {
173 $self->{'_current_seq_description'} = $value;
175 return $self->{'_current_seq_description'};
178 =head2 _current_parameters
180 Title : _current_parameters
181 Usage : $self->_current_parameters( $parameters_hashref )
182 Function: get/set the _current_parameters
183 Returns : hashref representing current parameters parsed from results file
192 Args : parameters hashref (optional)
196 sub _current_parameters
{
197 my ( $self, $value ) = @_;
198 if ( defined $value ) {
199 $self->{'_current_parameters'} = $value;
201 return $self->{'_current_parameters'};
207 Usage : my $r = $trf->next_result()
208 Function: Get the next result set from parser data
209 Returns : Bio::SeqFeature::Generic
216 while ( defined( $_ = $self->_readline() ) ) {
219 if (/^Version (.+)/) {
221 $self->warn("parsed version: $version\n") if DEBUG
;
222 $self->warn( qq{ Bio
::Tools
::TandemRepeatsFinder was written
and tested
for Tandem Repeats Masker Version
4.00 output
223 You appear to be using Verion
$version. Use at your own risk
.}) if ($version != 4);
224 $self->version($version);
227 # Parse Sequence identifier
228 # i.e. Sequence: DDB0215018 |Masked Chromosomal Sequence| Chr 2f
229 elsif ( /^Sequence: ([^\s]+)\s(.+)?/ ) {
231 my $seq_description = $2;
232 $self->warn("parsed sequence_id: $seq_id\n") if DEBUG
;
233 $self->_current_seq_id($seq_id);
234 $self->_current_seq_description($seq_description);
238 # i.e. Parameters: 2 7 7 80 10 50 12
239 elsif (/^Parameters: (.+)/) {
241 $self->warn("parsed parameters: $params\n") if DEBUG
;
243 my @param_array = split /\s/, $params;
246 match_weight
=> $param_array[0],
247 mismatch_weight
=> $param_array[1],
248 indel_weight
=> $param_array[2],
249 match_prob
=> $param_array[3],
250 indel_prob
=> $param_array[4],
251 min_score
=> $param_array[5],
252 max_period_size
=> $param_array[6]
254 $self->_current_parameters($param_hash);
258 # i.e. 13936 13960 12 2.1 12 100 0 50 16 8 52 24 1.70 T TTTTTTTTTT
259 elsif (/^\d+\s\d+\s\d+/) {
261 # call internal method to create Bio::SeqFeature::Generic
262 # to represent tandem repeat
263 return $self->_create_feature($_);
267 $self->warn( "UNPARSED LINE:\n" . $_ );
273 =head2 _create_feature
275 Title : _create_feature
276 Usage : internal method used by 'next_feature'
277 Function: Takes a line from the results file and creates a bioperl object
278 Returns : Bio::SeqFeature::Generic
283 sub _create_feature
{
284 my ( $self, $line ) = @_;
286 # split the line and store into named variables
287 my @element = split /\s/, $line;
289 $start, $end, $period_size,
290 $copy_number, $consensus_size, $percent_matches,
291 $percent_indels, $score, $percent_a,
292 $percent_c, $percent_g, $percent_t,
293 $entropy, $consensus_sequence, $repeat_sequence
296 # create tag hash from data in line
298 period_size
=> $period_size,
299 copy_number
=> $copy_number,
300 consensus_size
=> $consensus_size,
301 percent_matches
=> $percent_matches,
302 percent_indels
=> $percent_indels,
303 percent_a
=> $percent_a,
304 percent_c
=> $percent_c,
305 percent_g
=> $percent_g,
306 percent_t
=> $percent_t,
308 consensus_sequence
=> $consensus_sequence,
309 repeat_sequence
=> $repeat_sequence,
310 run_parameters
=> $self->_current_parameters(),
311 sequence_description
=> $self->_current_seq_description()
314 # create feature from start/end etc
315 my $feat = Bio
::SeqFeature
::Generic
->new(
316 -seq_id
=> $self->_current_seq_id(),
320 -source_tag
=> 'Tandem Repeats Finder',
321 -primary_tag
=> 'tandem repeat',