5 Bio::FeatureIO::vecscreen_simple - read/write features from NCBI vecscreen -f 3
11 my $fin = Bio::FeatureIO->new(-file=>'vecscreen.out',
12 -format=>'vecscreen_simple');
14 while (my $f = $fin->next_feature) {
15 push @vec_regions, $f;
18 # write features NOT IMPLEMENTED
22 vecscreen is a system for quickly identifying segments of a nucleic
23 acid sequence that may be of vector origin. NCBI developed vecscreen
24 to minimize the incidence and impact of vector contamination in public
25 sequence databases. GenBank Annotation Staff use vecscreen to verify
26 that sequences submitted for inclusion in the database are free from
27 contaminating vector sequence. Any sequence can be screened for vector
28 contamination using vecscreen.
30 This module provides parsing for vecscreen '-f 3' output, described in
31 the vecscreen documentation as 'Text list, no alignments'
37 User feedback is an integral part of the evolution of this and other
38 Bioperl modules. Send your comments and suggestions preferably to
39 the Bioperl mailing list. Your participation is much appreciated.
41 bioperl-l@bioperl.org - General discussion
42 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
46 Report bugs to the Bioperl bug tracking system to help us keep track
47 of the bugs and their resolution. Bug reports can be submitted via
50 http://bugzilla.open-bio.org/
52 =head1 AUTHOR - Robert Buels
54 Email rmb32 AT cornell.edu
58 Based on ptt.pm by Torsten Seeman
62 The rest of the documentation details each of the object methods.
63 Internal methods are usually preceded with a _
67 # Let the code begin...
69 package Bio
::FeatureIO
::vecscreen_simple
;
74 use base
qw(Bio::FeatureIO);
75 use Bio
::SeqFeature
::Generic
;
80 Function: Reading? parses the header of the input
88 $self->SUPER::_initialize
(%arg);
90 if ($self->mode eq 'r') {
91 $self->{parse_state
}->{seqname
} = '';
92 $self->{parse_state
}->{matchtype
} = '';
95 $self->throw('vecscreen_simple feature writing not implemented');
102 Usage : $io->next_feature()
103 Function: read the next feature from the vecscreen output file
105 Returns : Bio::SeqFeatureI object
111 return unless $self->mode eq 'r'; # returns if can't read next_feature when we're in write mode
113 while ( my $line = $self->_readline() ) {
115 if ( $line =~ /^>Vector (\S+)/ ) {
116 $self->{parse_state
}{seqname
} = $1;
117 } elsif ( $line =~ /^\s*WARNING/ ) {
118 $self->warn("$self->{parse_state}{seqname}: vecscreen says: $line\n");
119 } elsif ( $line =~ /\S/ ) {
121 $self->{parse_state
}{seqname
}
122 or $self->throw("Unexpected line in vecscreen output '$line'");
124 #if it's not a vector line, it should be either a match type or
126 my $lcline = lc $line;
128 if ( $line =~ /^(\d+)\s+(\d+)\s*$/ ) {
129 my ($s,$e) = ($1,$2);
131 my $matchtype = $self->{parse_state
}{matchtype
};
132 $matchtype =~ s/\s/_/g; #replace whitespace with underscores for the primary tag
133 return Bio
::SeqFeature
::Generic
->new( -start
=> $s,
135 -primary
=> $matchtype,
136 -seq_id
=> $self->{parse_state
}{seqname
},
138 } elsif ( $lcline eq 'no hits found' ) {
139 $self->{parse_state
}{seqname
} = '';
140 } elsif ( grep $lcline eq $_, 'strong match', 'moderate match', 'weak match', 'suspect origin') {
141 $self->{parse_state
}{matchtype
} = $lcline;
143 $self->throw("Parse error. Expected a match type or coordinate line but got '$line'");
146 #blank line, ignore it and reset parser
148 $self->{parse_state
}{seqname
} = ''; #< a line with whitespace
149 #indicates a boundary
152 $self->{parse_state
}{matchtype
} = '';
159 =head2 write_feature (NOT IMPLEMENTED)
161 Title : write_feature
162 Usage : $io->write_feature($feature)
163 Function: write a Bio::SeqFeatureI object in vecscreen -f 3 format
165 Args : Bio::SeqFeatureI object
171 shift->throw_not_implemented;