branch-1-6/Bio/Tools/Seg.pm

   1 # $Id$
   2 #
   3 # BioPerl module for Bio::Tools::Seg
   4 #
   5 # Copyright Balamurugan Kumarasamy
   6 # Totally re-written, added docs and tests -- Torsten Seemann, Sep 2006
   7 #
   8 # Copyright
   9 # You may distribute this module under the same terms as perl itself
  10 #
  11 # POD documentation - main docs before the code
  12
  13 =head1 NAME
  14
  15 Bio::Tools::Seg - parse C<seg> output
  16
  17 =head1 SYNOPSIS
  18
  19   use Bio::Tools::Seg;
  20   my $parser = Bio::Tools::Seg->(-file => 'seg.fasta');
  21   while ( my $f = $parser->next_result ) {
  22     if ($f->score < 1.5) {
  23       print $f->location->to_FTstring, " is low complexity\n";
  24     }
  25   }
  26
  27 =head1 DESCRIPTION
  28
  29 C<seg> identifies low-complexity regions on a protein sequence.
  30 It is usually part of the C<WU-BLAST> and C<InterProScan> packages.
  31
  32 The L<Bio::Tools::Seg> module will only parse the "fasta" output
  33 modes of C<seg>, i.e. C<seg -l> (low complexity regions only),
  34 C<seg -h> (high complexity regions only), or C<seg -a> (both low
  35 and high).
  36
  37 It creates a L<Bio::SeqFeature::Generic> for each FASTA-like entry
  38 found in the input file. It is up to the user to appropriately filter
  39 these using the feature's score.
  40
  41 =head1 FEEDBACK
  42
  43 =head2 Mailing Lists
  44
  45 User feedback is an integral part of the evolution of this and other
  46 Bioperl modules. Send your comments and suggestions preferably to
  47 the Bioperl mailing list.  Your participation is much appreciated.
  48
  49   bioperl-l@bioperl.org                  - General discussion
  50   http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
  51
  52 =head2 Support
  53
  54 Please direct usage questions or support issues to the mailing list:
  55
  56 I<bioperl-l@bioperl.org>
  57
  58 rather than to the module maintainer directly. Many experienced and
  59 reponsive experts will be able look at the problem and quickly
  60 address it. Please include a thorough description of the problem
  61 with code and data examples if at all possible.
  62
  63 =head2 Reporting Bugs
  64
  65 Report bugs to the Bioperl bug tracking system to help us keep track
  66 of the bugs and their resolution. Bug reports can be submitted via the
  67 web:
  68
  69   http://bugzilla.open-bio.org/
  70
  71 =head1 AUTHOR - Torsten Seemann
  72
  73 Email - torsten.seemann AT infotech.monash.edu.au
  74
  75 =head1 CONTRIBUTOR - Bala
  76
  77 Email - savikalpa@fugu-sg.org
  78
  79 =head1 APPENDIX
  80
  81 The rest of the documentation details each of the object methods.
  82 Internal methods are usually preceded with a _
  83
  84 =cut
  85
  86 package Bio::Tools::Seg;
  87 use strict;
  88
  89 use Bio::SeqFeature::Generic;
  90 use base qw(Bio::Root::Root Bio::Root::IO);
  91
  92 =head2 new
  93
  94  Title   : new
  95  Usage   : my $obj = Bio::Tools::Seg->new();
  96  Function: Builds a new Bio::Tools::Seg object
  97  Returns : Bio::Tools::Seg
  98  Args    : -fh/-file => $val, # for initing input, see Bio::Root::IO
  99
 100 =cut
 101
 102
 103 sub new {
 104         my($class,@args) = @_;
 105         my $self = $class->SUPER::new(@args);
 106         $self->_initialize_io(@args);
 107         return $self;
 108 }
 109
 110 =head2 next_result
 111
 112  Title   : next_result
 113  Usage   : my $feat = $seg->next_result
 114  Function: Get the next result set from parser data
 115  Returns : Bio::SeqFeature::Generic
 116  Args    : none
 117
 118 =cut
 119
 120 sub next_result {
 121         my ($self) = @_;
 122
 123         # For example in this line
 124         # test_prot(214-226) complexity=2.26 (12/2.20/2.50)
 125         # $1 is test_prot
 126         # $2 is 214
 127         # $3 is 226
 128         # $4 is 2.26
 129
 130         while (my $line = $self->_readline) {
 131                 if ($line =~ /^\>\s*?(\S+)?\s*?\((\d+)\-(\d+)\)\s*complexity=(\S+)/) {
 132                         return Bio::SeqFeature::Generic->new(
 133                                 -seq_id     => $1,
 134                                 -start      => $2,
 135                                 -end        => $3,
 136                                 -score      => $4,
 137                                 -source_tag => 'Seg',
 138                                 -primary    => 'low_complexity'
 139                         );
 140                 }
 141         }
 142 }
 143
 144
 145 1;