* seq_inds is not defined for Model-based HSPs
[bioperl-live.git] / Bio / FeatureIO / vecscreen_simple.pm
blob9d75cc169d94fc02ee74d81bd0db296436cd3030
1 =pod
3 =head1 NAME
5 Bio::FeatureIO::vecscreen_simple - read/write features from NCBI vecscreen -f 3
6 output
8 =head1 SYNOPSIS
10 # read features
11 my $fin = Bio::FeatureIO->new(-file=>'vecscreen.out',
12 -format=>'vecscreen_simple');
13 my @vec_regions;
14 while (my $f = $fin->next_feature) {
15 push @vec_regions, $f;
18 # write features NOT IMPLEMENTED
20 =head1 DESCRIPTION
22 vecscreen is a system for quickly identifying segments of a nucleic
23 acid sequence that may be of vector origin. NCBI developed vecscreen
24 to minimize the incidence and impact of vector contamination in public
25 sequence databases. GenBank Annotation Staff use vecscreen to verify
26 that sequences submitted for inclusion in the database are free from
27 contaminating vector sequence. Any sequence can be screened for vector
28 contamination using vecscreen.
30 This module provides parsing for vecscreen '-f 3' output, described in
31 the vecscreen documentation as 'Text list, no alignments'
33 =head1 FEEDBACK
35 =head2 Mailing Lists
37 User feedback is an integral part of the evolution of this and other
38 Bioperl modules. Send your comments and suggestions preferably to
39 the Bioperl mailing list. Your participation is much appreciated.
41 bioperl-l@bioperl.org - General discussion
42 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
44 =head2 Reporting Bugs
46 Report bugs to the Bioperl bug tracking system to help us keep track
47 of the bugs and their resolution. Bug reports can be submitted via
48 the web:
50 http://bugzilla.open-bio.org/
52 =head1 AUTHOR - Robert Buels
54 Email rmb32 AT cornell.edu
56 =head1 CONTRIBUTORS
58 Based on ptt.pm by Torsten Seeman
60 =head1 APPENDIX
62 The rest of the documentation details each of the object methods.
63 Internal methods are usually preceded with a _
65 =cut
67 # Let the code begin...
69 package Bio::FeatureIO::vecscreen_simple;
71 require 5.006_001;
73 use strict;
74 use base qw(Bio::FeatureIO);
75 use Bio::SeqFeature::Generic;
77 =head2 _initialize
79 Title : _initialize
80 Function: Reading? parses the header of the input
81 Writing?
83 =cut
85 sub _initialize {
86 my($self,%arg) = @_;
88 $self->SUPER::_initialize(%arg);
90 if ($self->mode eq 'r') {
91 $self->{parse_state}->{seqname} = '';
92 $self->{parse_state}->{matchtype} = '';
94 else {
95 $self->throw('vecscreen_simple feature writing not implemented');
99 =head2 next_feature
101 Title : next_feature
102 Usage : $io->next_feature()
103 Function: read the next feature from the vecscreen output file
104 Args : none
105 Returns : Bio::SeqFeatureI object
107 =cut
109 sub next_feature {
110 my $self = shift;
111 return unless $self->mode eq 'r'; # returns if can't read next_feature when we're in write mode
113 while ( my $line = $self->_readline() ) {
114 chomp $line;
115 if ( $line =~ /^>Vector (\S+)/ ) {
116 $self->{parse_state}{seqname} = $1;
117 } elsif ( $line =~ /^\s*WARNING/ ) {
118 $self->warn("$self->{parse_state}{seqname}: vecscreen says: $line\n");
119 } elsif ( $line =~ /\S/ ) {
121 $self->{parse_state}{seqname}
122 or $self->throw("Unexpected line in vecscreen output '$line'");
124 #if it's not a vector line, it should be either a match type or
125 #a coordinates line
126 my $lcline = lc $line;
128 if ( $line =~ /^(\d+)\s+(\d+)\s*$/ ) {
129 my ($s,$e) = ($1,$2);
131 my $matchtype = $self->{parse_state}{matchtype};
132 $matchtype =~ s/\s/_/g; #replace whitespace with underscores for the primary tag
133 return Bio::SeqFeature::Generic->new( -start => $s,
134 -end => $e,
135 -primary => $matchtype,
136 -seq_id => $self->{parse_state}{seqname},
138 } elsif ( $lcline eq 'no hits found' ) {
139 $self->{parse_state}{seqname} = '';
140 } elsif ( grep $lcline eq $_, 'strong match', 'moderate match', 'weak match', 'suspect origin') {
141 $self->{parse_state}{matchtype} = $lcline;
142 } else {
143 $self->throw("Parse error. Expected a match type or coordinate line but got '$line'");
145 } else {
146 #blank line, ignore it and reset parser
148 $self->{parse_state}{seqname} = ''; #< a line with whitespace
149 #indicates a boundary
150 #between output for
151 #different sequences
152 $self->{parse_state}{matchtype} = '';
156 return;
159 =head2 write_feature (NOT IMPLEMENTED)
161 Title : write_feature
162 Usage : $io->write_feature($feature)
163 Function: write a Bio::SeqFeatureI object in vecscreen -f 3 format
164 Example :
165 Args : Bio::SeqFeatureI object
166 Returns :
168 =cut
170 sub write_feature {
171 shift->throw_not_implemented;
176 1;#do not remove