From 0738a7b4b8b35d7470e4e75591845cf06ccca19e Mon Sep 17 00:00:00 2001 From: Brian Osborne Date: Mon, 21 Sep 2015 10:26:14 -0400 Subject: [PATCH] Add 'desc' to table format --- Bio/SeqIO/table.pm | 23 +++++++++++++++++++---- t/SeqIO/table.t | 39 ++++++++++++++++++++++++++++++++++++--- t/data/test-1.tab | 3 +++ t/data/test-1.tab.gb | 16 ++++++++++++++++ 4 files changed, 74 insertions(+), 7 deletions(-) create mode 100644 t/data/test-1.tab create mode 100644 t/data/test-1.tab.gb diff --git a/Bio/SeqIO/table.pm b/Bio/SeqIO/table.pm index 33ce7fc2d..64ff8825a 100644 --- a/Bio/SeqIO/table.pm +++ b/Bio/SeqIO/table.pm @@ -23,12 +23,11 @@ Bio::SeqIO::table - sequence input/output stream from a delimited table =head1 SYNOPSIS - # It is probably best not to use this object directly, but - # rather go through the SeqIO handler system. Go: + # Do not to use this object directly, use Bio::SeqIO, for example: - $stream = Bio::SeqIO->new(-file => $filename, -format => 'table'); + $in = Bio::SeqIO->new(-file => $filename, -format => 'table'); - while ( my $seq = $stream->next_seq() ) { + while ( my $seq = $in->next_seq() ) { # do something with $seq } @@ -126,6 +125,8 @@ use base qw(Bio::SeqIO); containing the accession number of the sequence -seq The one-based index of the column containing the sequence string of the sequence + -desc The one-based index of the column containing + the description of the sequence -species The one-based index of the column containing the species for the sequence record; if not a number, will be used as the static species @@ -178,6 +179,7 @@ sub _initialize { $header, $delim, $display_id, + $desc, $accnr, $seq, $taxon, @@ -188,6 +190,7 @@ sub _initialize { HEADER DELIM DISPLAY_ID + DESC ACCESSION_NUMBER SEQ SPECIES @@ -209,6 +212,7 @@ sub _initialize { $attrs->{-display_id} = $display_id if defined($display_id); $attrs->{-accession_number} = $accnr if defined($accnr); $attrs->{-seq} = $seq if defined($seq); + $attrs->{-desc} = $desc if defined($desc); if (defined($taxon)) { if (ref($taxon) || ($taxon =~ /^\d+$/)) { # either a static object, or a column reference @@ -582,6 +586,17 @@ sub trim_values{ return $self->{'trim_values'}; } +=head2 write_seq + + Title: write_seq + Usage: write_seq() is not implemented for table format output. + +=cut + +sub write_seq { + shift->throw("write_seq() not implemented for 'table' format"); +} + =head1 Internal methods All methods with a leading underscore are not meant to be part of the diff --git a/t/SeqIO/table.t b/t/SeqIO/table.t index 0cb17fa90..5a15eec12 100644 --- a/t/SeqIO/table.t +++ b/t/SeqIO/table.t @@ -1,5 +1,4 @@ # -*-Perl-*- Test Harness script for Bioperl -# $Id$ use strict; @@ -7,7 +6,7 @@ BEGIN { use lib '.'; use Bio::Root::Test; - test_begin(-tests => 450, + test_begin(-tests => 464, -requires_module => 'IO::Scalar'); use_ok('Bio::Tools::CodonTable'); @@ -78,9 +77,43 @@ ok $seqin = Bio::SeqIO->new(-file => test_input_file("test.tsv"), -trim => 1); run_tests([@names],[@accs],[4,4,4,4,4,5,5,5,5,5],[@psg],[@rs]); +# Tests to check that 'description' is read from 'table' format +ok $seqin = Bio::SeqIO->new( + -file => test_input_file("test-1.tab"), + -format => 'table', + -header => 1, + -display_id => 1, + -accession_number => 1, + -seq => 3, + -desc => 2 +); +ok($seqin); +my $seq = $seqin->next_seq; +ok($seq); +is( $seq->desc, 'd1'); +is( $seq->display_id, 'n1'); +is( $seq->seq, 'aaaa'); +$seq = $seqin->next_seq; +ok($seq); +is( $seq->desc, 'd2'); +is( $seq->display_id, 'n2'); +is( $seq->seq, 'tttt'); + $seqin->close(); -# need Spreadsheet::ParseExcel installed for testing Excel format +# Tests to check that we can _not_ write to 'table' format +ok $seqin = Bio::SeqIO->new( + -file => test_input_file("test-1.tab.gb"), + -format => 'genbank' +); +ok($seqin); +my $seq = $seqin->next_seq; +ok($seq); +my $tmpfile = test_output_file(); +my $seqout = Bio::SeqIO->new( -format => 'table', -file => ">$tmpfile" ); +dies_ok { $seqout->write_seq($seq) } "write_seq() not implemented"; + +# Need Spreadsheet::ParseExcel installed for testing Excel format SKIP: { test_skip(-tests => 112, -requires_module => 'Spreadsheet::ParseExcel'); diff --git a/t/data/test-1.tab b/t/data/test-1.tab new file mode 100644 index 000000000..2830d331a --- /dev/null +++ b/t/data/test-1.tab @@ -0,0 +1,3 @@ +N D S +n1 d1 aaaa +n2 d2 tttt diff --git a/t/data/test-1.tab.gb b/t/data/test-1.tab.gb new file mode 100644 index 000000000..aa2bfcfdc --- /dev/null +++ b/t/data/test-1.tab.gb @@ -0,0 +1,16 @@ +LOCUS n1 4 bp dna linear UNK +DEFINITION d1 +ACCESSION n1 +KEYWORDS . +FEATURES Location/Qualifiers +ORIGIN + 1 aaaa +// +LOCUS n2 4 bp dna linear UNK +DEFINITION d2 +ACCESSION n2 +KEYWORDS . +FEATURES Location/Qualifiers +ORIGIN + 1 tttt +// -- 2.11.4.GIT