Bio/SeqIO/table.pm

   1 #
   2 # BioPerl module for Bio::SeqIO::table
   3 #
   4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
   5 #
   6 # Cared for by Hilmar Lapp
   7 #
   8 # You may distribute this module under the same terms as perl itself.
   9 # Refer to the Perl Artistic License (see the license accompanying this
  10 # software package, or see http://www.perl.com/language/misc/Artistic.html)
  11 # for the terms under which you may use, modify, and redistribute this module.
  12 #
  13 # THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
  14 # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
  15 # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  16 #
  17
  18 # POD documentation - main docs before the code
  19
  20 =head1 NAME
  21
  22 Bio::SeqIO::table - sequence input/output stream from a delimited table
  23
  24 =head1 SYNOPSIS
  25
  26   # Do not to use this object directly, use Bio::SeqIO, for example:
  27
  28   $in = Bio::SeqIO->new(-file => $filename, -format => 'table');
  29
  30   while ( my $seq = $in->next_seq() ) {
  31         # do something with $seq
  32   }
  33
  34 =head1 DESCRIPTION
  35
  36 This class transforms records in a table-formatted text file into
  37 Bio::Seq objects.
  38
  39 A table-formatted text file of sequence records for the purposes of
  40 this module is defined as a text file with each row corresponding to a
  41 sequence, and the attributes of the sequence being in different
  42 columns. Columns are delimited by a common delimiter, for instance tab
  43 or comma.
  44
  45 The module permits specifying which columns hold which type of
  46 annotation. The semantics of certain attributes, if present, are
  47 pre-defined, e.g., accession number and sequence. Additional
  48 attributes may be added to the annotation bundle.
  49
  50 =head1 FEEDBACK
  51
  52 =head2 Mailing Lists
  53
  54 User feedback is an integral part of the evolution of this and other
  55 Bioperl modules. Send your comments and suggestions preferably to one
  56 of the Bioperl mailing lists.  Your participation is much appreciated.
  57
  58   bioperl-l@bioperl.org                  - General discussion
  59   http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
  60
  61 =head2 Support
  62
  63 Please direct usage questions or support issues to the mailing list:
  64
  65 I<bioperl-l@bioperl.org>
  66
  67 rather than to the module maintainer directly. Many experienced and
  68 reponsive experts will be able look at the problem and quickly
  69 address it. Please include a thorough description of the problem
  70 with code and data examples if at all possible.
  71
  72 =head2 Reporting Bugs
  73
  74 Report bugs to the Bioperl bug tracking system to help us keep track
  75 the bugs and their resolution.
  76
  77 Bug reports can be submitted via email or the web:
  78
  79   https://github.com/bioperl/bioperl-live/issues
  80
  81 =head1 AUTHOR - Hilmar Lapp
  82
  83 Email hlapp at gmx.net
  84
  85 =head1 APPENDIX
  86
  87 The rest of the documentation details each of the object
  88 methods. Internal methods are usually preceded with a _
  89
  90 =cut
  91
  92 # Let the code begin...
  93
  94 package Bio::SeqIO::table;
  95 use strict;
  96
  97 use Bio::Species;
  98 use Bio::Seq::SeqFactory;
  99 use Bio::Annotation::Collection;
 100 use Bio::Annotation::SimpleValue;
 101
 102 use base qw(Bio::SeqIO);
 103
 104 =head2 new
 105
 106  Title   : new
 107  Usage   : $stream = Bio::SeqIO->new(-file => $filename, -format => 'table')
 108  Function: Returns a new seqstream
 109  Returns : A Bio::SeqIO stream for a table format
 110  Args    : Named parameters:
 111
 112  -file              Name of file to read
 113  -fh                Filehandle to attach to
 114  -comment           Leading character(s) introducing a comment line
 115  -header            the number of header lines to skip; the first
 116                     non-comment header line will be used to obtain
 117                     column names; column names will be used as the
 118                     default tags for attaching annotation.
 119  -delim             The delimiter for columns as a regular expression;
 120                     consecutive occurrences of the delimiter will
 121                     not be collapsed.
 122  -display_id        The one-based index of the column containing
 123                     the display ID of the sequence
 124  -accession_number  The one-based index of the column
 125                     containing the accession number of the sequence
 126  -seq               The one-based index of the column containing
 127                     the sequence string of the sequence
 128  -desc              The one-based index of the column containing
 129                     the description of the sequence
 130  -species           The one-based index of the column containing the
 131                     species for the sequence record; if not a
 132                     number, will be used as the static species
 133                     common to all records
 134  -annotation        If provided and a scalar (but see below), a
 135                     flag whether or not all additional columns are
 136                     to be preserved as annotation, the tags used
 137                     will either be 'colX' if there is no column
 138                     header and where X is the one-based column
 139                     index, and otherwise the column headers will be
 140                     used as tags;
 141
 142                     If a reference to an array, or a square
 143                     bracket-enclosed string of comma-delimited
 144                     values, only those columns (one-based index)
 145                     will be preserved as annotation, tags as before;
 146
 147                     If a reference to a hash, or a curly
 148                     braces-enclosed string of comma-delimited key
 149                     and value pairs in alternating order, the keys
 150                     are one-based column indexes to be preserved,
 151                     and the values are the tags under which the
 152                     annotation is to be attached; if not provided or
 153                     supplied as undef, no additional annotation will
 154                     be preserved.
 155  -colnames          A reference to an array of column labels, or a
 156                     string of comma-delimited labels, denoting the
 157                     columns to be converted into annotation; this is
 158                     an alternative to -annotation and will be
 159                     ignored if -annotation is also supplied with a
 160                     valid value.
 161  -trim              Flag determining whether or not all values should
 162                     be trimmed of leading and trailing white space
 163                     and double quotes
 164
 165  Additional arguments may be used to e.g. set factories and
 166  builders involved in the sequence object creation (see the
 167  POD of Bio::SeqIO).
 168
 169 =cut
 170
 171 sub _initialize {
 172     my($self,@args) = @_;
 173
 174     # chained initialization
 175     $self->SUPER::_initialize(@args);
 176
 177     # our own parameters
 178     my ($cmtchars,
 179         $header,
 180         $delim,
 181         $display_id,
 182         $desc,
 183         $accnr,
 184         $seq,
 185         $taxon,
 186         $useann,
 187         $colnames,
 188         $trim) =
 189             $self->_rearrange([qw(COMMENT
 190                                   HEADER
 191                                   DELIM
 192                                   DISPLAY_ID
 193                                   DESC
 194                                   ACCESSION_NUMBER
 195                                   SEQ
 196                                   SPECIES
 197                                   ANNOTATION
 198                                   COLNAMES
 199                                   TRIM)
 200                               ], @args);
 201
 202     # store options and apply defaults
 203     $self->comment_char(defined($cmtchars) ? $cmtchars : "#")
 204         if (!defined($self->comment_char)) || defined($cmtchars);
 205     $self->delimiter(defined($delim) ? $delim : "\t")
 206         if (!defined($self->delimiter)) || defined($delim);
 207     $self->header($header) if defined($header);
 208     $self->trim_values($trim) if defined($trim);
 209
 210     # attribute columns
 211     my $attrs = {};
 212     $attrs->{-display_id} = $display_id if defined($display_id);
 213     $attrs->{-accession_number} = $accnr if defined($accnr);
 214     $attrs->{-seq} = $seq if defined($seq);
 215     $attrs->{-desc} = $desc if defined($desc);
 216     if (defined($taxon)) {
 217         if (ref($taxon) || ($taxon =~ /^\d+$/)) {
 218             # either a static object, or a column reference
 219             $attrs->{-species} = $taxon;
 220         } else {
 221             # static species as a string
 222             $attrs->{-species} = Bio::Species->new(
 223                 -classification => [reverse(split(' ',$taxon))]);
 224         }
 225     }
 226     $self->attribute_map($attrs);
 227
 228     # annotation columns, if any
 229     if ($useann && !ref($useann)) {
 230         # it's a scalar; check whether this is in fact an array or
 231         # hash as a string rather than just a flag
 232         if ($useann =~ /^\[(.*)\]$/) {
 233             $useann = [split(/[,;]/,$1)];
 234         } elsif ($useann =~ /^{(.*)}$/) {
 235             $useann = {split(/[,;]/,$1)};
 236         } # else it is probably indeed just a flag
 237     }
 238     if (ref($useann)) {
 239         my $ann_map;
 240         if (ref($useann) eq "ARRAY") {
 241             my $has_header = ($self->header > 0);
 242             $ann_map = {};
 243             foreach my $i (@$useann) {
 244                 $ann_map->{$i} = $has_header ? undef : "col$i";
 245             }
 246         } else {
 247             # no special handling necessary
 248             $ann_map = $useann;
 249         }
 250         $self->annotation_map($ann_map);
 251     } else {
 252         $self->keep_annotation($useann || $colnames);
 253         # annotation columns, if any
 254         if ($colnames && !ref($colnames)) {
 255             # an array as a string
 256             $colnames =~ s/^\[(.*)\]$/$1/;
 257             $colnames = [split(/[,;]/,$colnames)];
 258         }
 259         $self->annotation_columns($colnames) if ref($colnames);
 260     }
 261
 262     # make sure we have a factory defined
 263     if(!defined($self->sequence_factory)) {
 264         $self->sequence_factory(
 265             Bio::Seq::SeqFactory->new(-verbose => $self->verbose(),
 266                                       -type => 'Bio::Seq::RichSeq'));
 267     }
 268 }
 269
 270 =head2 next_seq
 271
 272  Title   : next_seq
 273  Usage   : $seq = $stream->next_seq()
 274  Function: returns the next sequence in the stream
 275  Returns : Bio::Seq::RichSeq object
 276  Args    :
 277
 278 =cut
 279
 280 sub next_seq {
 281     my $self = shift;
 282
 283     # skip until not a comment and not an empty line
 284     my $line_ok = $self->_next_record();
 285
 286     # if there is a header but we haven't read past it yet then do so now
 287     if ($line_ok && (! $self->_header_skipped) && $self->header) {
 288         $line_ok = $self->_parse_header();
 289         $self->_header_skipped(1);
 290     }
 291
 292     # return if we reached end-of-file
 293     return unless $line_ok;
 294
 295     # otherwise, parse the record
 296
 297     # split into columns
 298     my @cols = $self->_get_row_values();
 299     # trim leading and trailing whitespace and quotes if desired
 300     if ($self->trim_values) {
 301         for(my $i = 0; $i < scalar(@cols); $i++) {
 302             if ($cols[$i]) {
 303                 # trim off whitespace
 304                 $cols[$i] =~ s/^\s+//;
 305                 $cols[$i] =~ s/\s+$//;
 306                 # trim off double quotes
 307                 $cols[$i] =~ s/^"//;
 308                 $cols[$i] =~ s/"$//;
 309             }
 310         }
 311     }
 312
 313     # assign values for columns in the attribute map
 314     my $attrmap = $self->_attribute_map;
 315     my %params = ();
 316     foreach my $attr (keys %$attrmap) {
 317         if ((!ref($attrmap->{$attr})) && ($attrmap->{$attr} =~ /^\d+$/)) {
 318             # this is a column index, add to instantiation parameters
 319             $params{$attr} = $cols[$attrmap->{$attr}];
 320         } else {
 321             # not a column index; we assume it's a static value
 322             $params{$attr} = $attrmap->{$attr};
 323         }
 324     }
 325
 326     # add annotation columns to the annotation bundle
 327     my $annmap = $self->_annotation_map;
 328     if ($annmap && %$annmap) {
 329         my $anncoll = Bio::Annotation::Collection->new();
 330         foreach my $col (keys %$annmap) {
 331             next unless $cols[$col]; # skip empty columns!
 332             $anncoll->add_Annotation(
 333                 Bio::Annotation::SimpleValue->new(-value  => $cols[$col],
 334                                                   -tagname=> $annmap->{$col}));
 335         }
 336         $params{'-annotation'} = $anncoll;
 337     }
 338
 339     # ask the object builder to add the slots that we've gathered
 340     my $builder = $self->sequence_builder();
 341     $builder->add_slot_value(%params);
 342     # and instantiate the object
 343     my $seq = $builder->make_object();
 344
 345     # done!
 346     return $seq;
 347 }
 348
 349 =head2 comment_char
 350
 351  Title   : comment_char
 352  Usage   : $obj->comment_char($newval)
 353  Function: Get/set the leading character(s) designating a line as
 354            a comment-line.
 355  Example :
 356  Returns : value of comment_char (a scalar)
 357  Args    : on set, new value (a scalar or undef, optional)
 358
 359
 360 =cut
 361
 362 sub comment_char{
 363     my $self = shift;
 364
 365     return $self->{'comment_char'} = shift if @_;
 366     return $self->{'comment_char'};
 367 }
 368
 369 =head2 header
 370
 371  Title   : header
 372  Usage   : $obj->header($newval)
 373  Function: Get/set the number of header lines to skip before the
 374            rows containing actual sequence records.
 375
 376            If set to zero or undef, means that there is no header and
 377            therefore also no column headers.
 378
 379  Example :
 380  Returns : value of header (a scalar)
 381  Args    : on set, new value (a scalar or undef, optional)
 382
 383
 384 =cut
 385
 386 sub header{
 387     my $self = shift;
 388
 389     return $self->{'header'} = shift if @_;
 390     return $self->{'header'};
 391 }
 392
 393 =head2 delimiter
 394
 395  Title   : delimiter
 396  Usage   : $obj->delimiter($newval)
 397  Function: Get/set the column delimiter. This will in fact be
 398            treated as a regular expression. Consecutive occurrences
 399            will not be collapsed to a single one.
 400
 401  Example :
 402  Returns : value of delimiter (a scalar)
 403  Args    : on set, new value (a scalar or undef, optional)
 404
 405
 406 =cut
 407
 408 sub delimiter{
 409     my $self = shift;
 410
 411     return $self->{'delimiter'} = shift if @_;
 412     return $self->{'delimiter'};
 413 }
 414
 415 =head2 attribute_map
 416
 417  Title   : attribute_map
 418  Usage   : $obj->attribute_map($newval)
 419  Function: Get/set the map of sequence object initialization
 420            attributes (keys) to one-based column index.
 421
 422            Attributes will usually need to be prefixed by a dash, just
 423            as if they were passed to the new() method of the sequence
 424            class.
 425
 426  Example :
 427  Returns : value of attribute_map (a reference to a hash)
 428  Args    : on set, new value (a reference to a hash or undef, optional)
 429
 430
 431 =cut
 432
 433 sub attribute_map{
 434     my $self = shift;
 435
 436     # internally we store zero-based maps - so we need to convert back
 437     # and forth here
 438     if (@_) {
 439         my $arg = shift;
 440         # allow for and protect against undef
 441         return delete $self->{'_attribute_map'} unless defined($arg);
 442         # copy to avoid side-effects
 443         my $attr_map = {%$arg};
 444         foreach my $key (keys %$attr_map) {
 445             if ((!ref($attr_map->{$key})) && ($attr_map->{$key} =~ /^\d+$/)) {
 446                 $attr_map->{$key}--;
 447             }
 448         }
 449         $self->{'_attribute_map'} = $attr_map;
 450     }
 451     # there may not be a map
 452     return unless exists($self->{'_attribute_map'});
 453     # we need to copy in order not to override the stored map!
 454     my %attr_map = %{$self->{'_attribute_map'}};
 455     foreach my $key (keys %attr_map) {
 456         if ((!ref($attr_map{$key})) && ($attr_map{$key} =~ /^\d+$/)) {
 457             $attr_map{$key}++;
 458         }
 459     }
 460     return \%attr_map;
 461 }
 462
 463 =head2 annotation_map
 464
 465  Title   : annotation_map
 466  Usage   : $obj->annotation_map($newval)
 467  Function: Get/set the mapping between one-based column indexes
 468            (keys) and annotation tags (values).
 469
 470            Note that the map returned by this method may change after
 471            the first next_seq() call if the file contains a column
 472            header and no annotation keys have been predefined in the
 473            map, because upon reading the column header line the tag
 474            names will be set automatically.
 475
 476            Note also that the map may reference columns that are used
 477            as well in the sequence attribute map.
 478
 479  Example :
 480  Returns : value of annotation_map (a reference to a hash)
 481  Args    : on set, new value (a reference to a hash or undef, optional)
 482
 483
 484 =cut
 485
 486 sub annotation_map{
 487     my $self = shift;
 488
 489     # internally we store zero-based maps - so we need to convert back
 490     # and forth here
 491     if (@_) {
 492         my $arg = shift;
 493         # allow for and protect against undef
 494         return delete $self->{'_annotation_map'} unless defined($arg);
 495         # copy to avoid side-effects
 496         my $ann_map = {%$arg};
 497         # make sure we sort the keys numerically or otherwise we may
 498         # clobber a key with a higher index
 499         foreach my $key (sort { $a <=> $b } keys(%$ann_map)) {
 500             $ann_map->{$key-1} = $ann_map->{$key};
 501             delete $ann_map->{$key};
 502         }
 503         $self->{'_annotation_map'} = $ann_map;
 504         # also make a note that we want to keep annotation
 505         $self->keep_annotation(1);
 506     }
 507     # there may not be a map
 508     return unless exists($self->{'_annotation_map'});
 509     # we need to copy in order not to override the stored map!
 510     my %ann_map = %{$self->{'_annotation_map'}};
 511     # here we need to sort numerically in reverse order ...
 512     foreach my $key (sort { $b <=> $a } keys(%ann_map)) {
 513         $ann_map{$key+1} = $ann_map{$key};
 514         delete $ann_map{$key};
 515     }
 516     return \%ann_map;
 517 }
 518
 519 =head2 keep_annotation
 520
 521  Title   : keep_annotation
 522  Usage   : $obj->keep_annotation($newval)
 523  Function: Get/set flag whether or not to keep values from
 524            additional columns as annotation.
 525
 526            Additional columns are all those columns in the input file
 527            that aren't referenced in the attribute map.
 528
 529  Example :
 530  Returns : value of keep_annotation (a scalar)
 531  Args    : on set, new value (a scalar or undef, optional)
 532
 533
 534 =cut
 535
 536 sub keep_annotation{
 537     my $self = shift;
 538
 539     return $self->{'keep_annotation'} = shift if @_;
 540     return $self->{'keep_annotation'};
 541 }
 542
 543 =head2 annotation_columns
 544
 545  Title   : annotation_columns
 546  Usage   : $obj->annotation_columns($newval)
 547  Function: Get/set the names (labels) of the columns to be used for
 548            annotation.
 549
 550            This is an alternative to using annotation_map. In order to
 551            have any effect, it must be set before the first call of
 552            next_seq(), and obviously there must be a header line (or
 553            row) too giving the column labels.
 554
 555  Example :
 556  Returns : value of annotation_columns (a reference to an array)
 557  Args    : on set, new value (a reference to an array of undef, optional)
 558
 559
 560 =cut
 561
 562 sub annotation_columns{
 563     my $self = shift;
 564
 565     return $self->{'annotation_columns'} = shift if @_;
 566     return $self->{'annotation_columns'};
 567 }
 568
 569 =head2 trim_values
 570
 571  Title   : trim_values
 572  Usage   : $obj->trim_values($newval)
 573  Function: Get/set whether or not to trim leading and trailing
 574            whitespace off all column values.
 575  Example :
 576  Returns : value of trim_values (a scalar)
 577  Args    : on set, new value (a scalar or undef, optional)
 578
 579
 580 =cut
 581
 582 sub trim_values{
 583     my $self = shift;
 584
 585     return $self->{'trim_values'} = shift if @_;
 586     return $self->{'trim_values'};
 587 }
 588
 589 =head2 write_seq
 590
 591  Title: write_seq
 592  Usage: write_seq() is not implemented for table format output.
 593
 594 =cut
 595
 596 sub write_seq {
 597     shift->throw("write_seq() not implemented for 'table' format");
 598 }
 599
 600 =head1 Internal methods
 601
 602 All methods with a leading underscore are not meant to be part of the
 603 'official' API. They are for use by this module only, consider them
 604 private unless you are a developer trying to modify this module.
 605
 606 =cut
 607
 608 =head2 _attribute_map
 609
 610  Title   : _attribute_map
 611  Usage   : $obj->_attribute_map($newval)
 612  Function: Get only. Same as attribute_map, but zero-based indexes.
 613
 614            Note that any changes made to the returned map will change
 615            the map used by this instance. You should know what you are
 616            doing if you modify the returned value (or if you call this
 617            method in the first place).
 618
 619  Example :
 620  Returns : value of _attribute_map (a reference to a hash)
 621  Args    : none
 622
 623
 624 =cut
 625
 626 sub _attribute_map{
 627     my $self = shift;
 628
 629     return $self->{'_attribute_map'};
 630 }
 631
 632 =head2 _annotation_map
 633
 634  Title   : _annotation_map
 635  Usage   : $obj->_annotation_map($newval)
 636  Function: Get only. Same as annotation_map, but with zero-based indexes.
 637
 638            Note that any changes made to the returned map will change
 639            the map used by this instance. You should know what you are
 640            doing if you modify the returned value (or if you call this
 641            method in the first place).
 642
 643  Example :
 644  Returns : value of _annotation_map (a reference to a hash)
 645  Args    : none
 646
 647
 648 =cut
 649
 650 sub _annotation_map{
 651     my $self = shift;
 652
 653     return $self->{'_annotation_map'};
 654 }
 655
 656 =head2 _header_skipped
 657
 658  Title   : _header_skipped
 659  Usage   : $obj->_header_skipped($newval)
 660  Function: Get/set the flag whether the header was already
 661            read (and skipped) or not.
 662  Example :
 663  Returns : value of _header_skipped (a scalar)
 664  Args    : on set, new value (a scalar or undef, optional)
 665
 666
 667 =cut
 668
 669 sub _header_skipped{
 670     my $self = shift;
 671
 672     return $self->{'_header_skipped'} = shift if @_;
 673     return $self->{'_header_skipped'};
 674 }
 675
 676 =head2 _next_record
 677
 678  Title   : _next_record
 679  Usage   :
 680  Function: Navigates the underlying file to the next record.
 681
 682            For row-based records in delimited text files, this will
 683            skip all empty lines and lines with a leading comment
 684            character.
 685
 686            This method is here is to serve as a hook for other formats
 687            that conceptually also represent tables but aren't
 688            formatted as row-based text files.
 689
 690  Example :
 691  Returns : TRUE if the navigation was successful and FALSE
 692            otherwise. Unsuccessful navigation will usually be treated
 693            as an end-of-file condition.
 694  Args    :
 695
 696
 697 =cut
 698
 699 sub _next_record{
 700     my $self = shift;
 701
 702     my $cmtcc = $self->comment_char;
 703     my $line = $self->_readline();
 704
 705     # skip until not a comment and not an empty line
 706     while (defined($line)
 707            && (($cmtcc && ($line =~ /^\s*$cmtcc/))
 708                || ($line =~ /^\s*$/))) {
 709         $line = $self->_readline();
 710     }
 711
 712     return $self->{'_line'} = $line;
 713 }
 714
 715 =head2 _parse_header
 716
 717  Title   : _parse_header
 718  Usage   :
 719  Function: Parse the table header and navigate past it.
 720
 721            This method is called if the number of header rows has been
 722            specified equal to or greater than one, and positioned at
 723            the first header line (row). By default the first header
 724            line (row) is used for setting column names, but additional
 725            lines (rows) may be skipped too. Empty lines and comment
 726            lines do not count as header lines (rows).
 727
 728            This method will call _next_record() to navigate to the
 729            next header line (row), if there is more than one header
 730            line (row). Upon return, the file is presumed to be
 731            positioned at the first record after the header.
 732
 733            This method is here is to serve as a hook for other formats
 734            that conceptually also represent tables but aren't
 735            formatted as row-based text files.
 736
 737            Note however that the only methods used to access file
 738            content or navigate the position are _get_row_values() and
 739            _next_record(), so it should usually suffice to override
 740            those.
 741
 742  Example :
 743  Returns : TRUE if navigation past the header was successful and FALSE
 744            otherwise. Unsuccessful navigation will usually be treated
 745            as an end-of-file condition.
 746  Args    :
 747
 748
 749 =cut
 750
 751 sub _parse_header{
 752     my $self = shift;
 753
 754     # the first header line contains the column headers, see whether
 755     # we need them
 756     if ($self->keep_annotation) {
 757         my @colnames = $self->_get_row_values();
 758         # trim leading and trailing whitespace if desired
 759         if ($self->trim_values) {
 760             # trim off whitespace
 761             @colnames = map { $_ =~ s/^\s+//; $_ =~ s/\s+$//; $_; } @colnames;
 762             # trim off double quotes
 763             @colnames = map { $_ =~ s/^"//; $_ =~ s/"$//; $_; } @colnames;
 764         }
 765         # build or complete annotation column map
 766         my $annmap = $self->annotation_map || {};
 767         if (! %$annmap) {
 768             # check whether columns have been defined by name rather than index
 769             if (my $anncols = $self->annotation_columns) {
 770                 # first sanity check: all column names must map
 771                 my %colmap = map { ($_,1); } @colnames;
 772                 foreach my $col (@$anncols) {
 773                     if (!exists($colmap{$col})) {
 774                         $self->throw("no such column labeled '$col'");
 775                     }
 776                 }
 777                 # now map to the column indexes
 778                 %colmap = map { ($_,1); } @$anncols;
 779                 for (my $i = 0; $i < scalar(@colnames); $i++) {
 780                     if (exists($colmap{$colnames[$i]})) {
 781                         $annmap->{$i+1} = $colnames[$i];
 782                     }
 783                 }
 784             } else {
 785                 # no columns specified, default to all non-attribute columns
 786                 for (my $i = 0; $i < scalar(@colnames); $i++) {
 787                     $annmap->{$i+1} = $colnames[$i];
 788                 }
 789                 # subtract all attribute-referenced columns
 790                 foreach my $attrcol (values %{$self->attribute_map}) {
 791                     if ((!ref($attrcol)) && ($attrcol =~ /^\d+$/)) {
 792                         delete $annmap->{$attrcol};
 793                     }
 794                 }
 795             }
 796         } else {
 797             # fill in where the tag names weren't pre-defined
 798             for (my $i = 0; $i < scalar(@colnames); $i++) {
 799                 if (exists($annmap->{$i+1}) && ! defined($annmap->{$i+1})) {
 800                     $annmap->{$i+1} = $colnames[$i];
 801                 }
 802             }
 803         }
 804         $self->annotation_map($annmap);
 805     }
 806
 807     # now read past the header
 808     my $header_lines = $self->header;
 809     my $line_ok = 1;
 810     while (defined($line_ok) && ($header_lines > 0)) {
 811         $line_ok = $self->_next_record();
 812         $header_lines--;
 813     }
 814
 815     return $line_ok;
 816 }
 817
 818 =head2 _get_row_values
 819
 820  Title   : _get_row_values
 821  Usage   :
 822  Function: Get the values for the current line (or row) as an array in
 823            the order of columns.
 824
 825            This method is here is to serve as a hook for other formats
 826            that conceptually also represent tables but aren't
 827            formatted as row-based text files.
 828
 829  Example :
 830  Returns : An array of column values for the current row.
 831  Args    :
 832
 833
 834 =cut
 835
 836 sub _get_row_values{
 837     my $self = shift;
 838     my $delim = $self->delimiter;
 839     my $line = $self->{'_line'};
 840     chomp($line);
 841     my @cols = split(/$delim/,$line);
 842     return @cols;
 843 }
 844
 845 1;