Bio/SeqIO/table.pm

   1 #
   2 # BioPerl module for Bio::SeqIO::table
   3 #
   4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
   5 #
   6 # Cared for by Hilmar Lapp <hlapp at gmx.net>
   7 #
   8
   9 #
  10 # (c) Hilmar Lapp, hlapp at gmx.net, 2005.
  11 # (c) GNF, Genomics Institute of the Novartis Research Foundation, 2005.
  12 #
  13 # You may distribute this module under the same terms as perl itself.
  14 # Refer to the Perl Artistic License (see the license accompanying this
  15 # software package, or see http://www.perl.com/language/misc/Artistic.html)
  16 # for the terms under which you may use, modify, and redistribute this module.
  17 #
  18 # THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
  19 # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
  20 # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  21 #
  22
  23 # POD documentation - main docs before the code
  24
  25 =head1 NAME
  26
  27 Bio::SeqIO::table - sequence input/output stream from a delimited table
  28
  29 =head1 SYNOPSIS
  30
  31   #It is probably best not to use this object directly, but
  32   #rather go through the SeqIO handler system. Go:
  33
  34   $stream = Bio::SeqIO->new(-file => $filename, -format => 'table');
  35
  36   while ( my $seq = $stream->next_seq() ) {
  37         # do something with $seq
  38   }
  39
  40 =head1 DESCRIPTION
  41
  42 This class transforms records in a table-formatted text file into
  43 Bio::Seq objects.
  44
  45 A table-formatted text file of sequence records for the purposes of
  46 this module is defined as a text file with each row corresponding to a
  47 sequence, and the attributes of the sequence being in different
  48 columns. Columns are delimited by a common delimiter, for instance tab
  49 or comma.
  50
  51 The module permits specifying which columns hold which type of
  52 annotation. The semantics of certain attributes, if present, are
  53 pre-defined, e.g., accession number and sequence. Additional
  54 attributes may be added to the annotation bundle.
  55
  56 =head1 FEEDBACK
  57
  58 =head2 Mailing Lists
  59
  60 User feedback is an integral part of the evolution of this and other
  61 Bioperl modules. Send your comments and suggestions preferably to one
  62 of the Bioperl mailing lists.  Your participation is much appreciated.
  63
  64   bioperl-l@bioperl.org                  - General discussion
  65   http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
  66
  67 =head2 Support
  68
  69 Please direct usage questions or support issues to the mailing list:
  70
  71 I<bioperl-l@bioperl.org>
  72
  73 rather than to the module maintainer directly. Many experienced and
  74 reponsive experts will be able look at the problem and quickly
  75 address it. Please include a thorough description of the problem
  76 with code and data examples if at all possible.
  77
  78 =head2 Reporting Bugs
  79
  80 Report bugs to the Bioperl bug tracking system to help us keep track
  81 the bugs and their resolution.
  82
  83 Bug reports can be submitted via email or the web:
  84
  85   https://github.com/bioperl/bioperl-live/issues
  86
  87 =head1 AUTHOR - Hilmar Lapp
  88
  89 Email hlapp at gmx.net
  90
  91 =head1 APPENDIX
  92
  93 The rest of the documentation details each of the object
  94 methods. Internal methods are usually preceded with a _
  95
  96 =cut
  97
  98 # Let the code begin...
  99
 100 package Bio::SeqIO::table;
 101 use strict;
 102
 103 use Bio::Species;
 104 use Bio::Seq::SeqFactory;
 105 use Bio::Annotation::Collection;
 106 use Bio::Annotation::SimpleValue;
 107
 108 use base qw(Bio::SeqIO);
 109
 110 =head2 new
 111
 112  Title   : new
 113  Usage   : $stream = Bio::SeqIO->new(-file => $filename, -format => 'table')
 114  Function: Returns a new seqstream
 115  Returns : A Bio::SeqIO stream for a table format
 116  Args    : Named parameters:
 117
 118              -file    name of file to read
 119              -fh      filehandle to attach to
 120              -comment leading character(s) introducing a comment line
 121              -header  the number of header lines to skip; the first
 122                       non-comment header line will be used to obtain
 123                       column names; column names will be used as the
 124                       default tags for attaching annotation.
 125              -delim   the delimiter for columns as a regular expression;
 126                       consecutive occurrences of the delimiter will
 127                       not be collapsed.
 128              -display_id the one-based index of the column containing
 129                       the display ID of the sequence
 130              -accession_number the one-based index of the column
 131                       containing the accession number of the sequence
 132              -seq     the one-based index of the column containing
 133                       the sequence string of the sequence
 134              -species the one-based index of the column containing the
 135                       species for the sequence record; if not a
 136                       number, will be used as the static species
 137                       common to all records
 138              -annotation if provided and a scalar (but see below), a
 139                       flag whether or not all additional columns are
 140                       to be preserved as annotation, the tags used
 141                       will either be 'colX' if there is no column
 142                       header and where X is the one-based column
 143                       index, and otherwise the column headers will be
 144                       used as tags;
 145
 146                       if a reference to an array, or a square
 147                       bracket-enclosed string of comma-delimited
 148                       values, only those columns (one-based index)
 149                       will be preserved as annotation, tags as before;
 150
 151                       if a reference to a hash, or a curly
 152                       braces-enclosed string of comma-delimited key
 153                       and value pairs in alternating order, the keys
 154                       are one-based column indexes to be preserved,
 155                       and the values are the tags under which the
 156                       annotation is to be attached; if not provided or
 157                       supplied as undef, no additional annotation will
 158                       be preserved.
 159              -colnames a reference to an array of column labels, or a
 160                       string of comma-delimited labels, denoting the
 161                       columns to be converted into annotation; this is
 162                       an alternative to -annotation and will be
 163                       ignored if -annotation is also supplied with a
 164                       valid value.
 165              -trim    flag determining whether or not all values should
 166                       be trimmed of leading and trailing white space
 167                       and double quotes
 168
 169            Additional arguments may be used to e.g. set factories and
 170            builders involved in the sequence object creation (see the
 171            POD of Bio::SeqIO).
 172
 173
 174 =cut
 175
 176 sub _initialize {
 177     my($self,@args) = @_;
 178
 179     # chained initialization
 180     $self->SUPER::_initialize(@args);
 181
 182     # our own parameters
 183     my ($cmtchars,
 184         $header,
 185         $delim,
 186         $display_id,
 187         $accnr,
 188         $seq,
 189         $taxon,
 190         $useann,
 191         $colnames,
 192         $trim) =
 193             $self->_rearrange([qw(COMMENT
 194                                   HEADER
 195                                   DELIM
 196                                   DISPLAY_ID
 197                                   ACCESSION_NUMBER
 198                                   SEQ
 199                                   SPECIES
 200                                   ANNOTATION
 201                                   COLNAMES
 202                                   TRIM)
 203                               ], @args);
 204
 205     # store options and apply defaults
 206     $self->comment_char(defined($cmtchars) ? $cmtchars : "#")
 207         if (!defined($self->comment_char)) || defined($cmtchars);
 208     $self->delimiter(defined($delim) ? $delim : "\t")
 209         if (!defined($self->delimiter)) || defined($delim);
 210     $self->header($header) if defined($header);
 211     $self->trim_values($trim) if defined($trim);
 212
 213     # attribute columns
 214     my $attrs = {};
 215     $attrs->{-display_id} = $display_id if defined($display_id);
 216     $attrs->{-accession_number} = $accnr if defined($accnr);
 217     $attrs->{-seq} = $seq if defined($seq);
 218     if (defined($taxon)) {
 219         if (ref($taxon) || ($taxon =~ /^\d+$/)) {
 220             # either a static object, or a column reference
 221             $attrs->{-species} = $taxon;
 222         } else {
 223             # static species as a string
 224             $attrs->{-species} = Bio::Species->new(
 225                 -classification => [reverse(split(' ',$taxon))]);
 226         }
 227     }
 228     $self->attribute_map($attrs);
 229
 230     # annotation columns, if any
 231     if ($useann && !ref($useann)) {
 232         # it's a scalar; check whether this is in fact an array or
 233         # hash as a string rather than just a flag
 234         if ($useann =~ /^\[(.*)\]$/) {
 235             $useann = [split(/[,;]/,$1)];
 236         } elsif ($useann =~ /^{(.*)}$/) {
 237             $useann = {split(/[,;]/,$1)};
 238         } # else it is probably indeed just a flag
 239     }
 240     if (ref($useann)) {
 241         my $ann_map;
 242         if (ref($useann) eq "ARRAY") {
 243             my $has_header = ($self->header > 0);
 244             $ann_map = {};
 245             foreach my $i (@$useann) {
 246                 $ann_map->{$i} = $has_header ? undef : "col$i";
 247             }
 248         } else {
 249             # no special handling necessary
 250             $ann_map = $useann;
 251         }
 252         $self->annotation_map($ann_map);
 253     } else {
 254         $self->keep_annotation($useann || $colnames);
 255         # annotation columns, if any
 256         if ($colnames && !ref($colnames)) {
 257             # an array as a string
 258             $colnames =~ s/^\[(.*)\]$/$1/;
 259             $colnames = [split(/[,;]/,$colnames)];
 260         }
 261         $self->annotation_columns($colnames) if ref($colnames);
 262     }
 263
 264     # make sure we have a factory defined
 265     if(!defined($self->sequence_factory)) {
 266         $self->sequence_factory(
 267             Bio::Seq::SeqFactory->new(-verbose => $self->verbose(),
 268                                       -type => 'Bio::Seq::RichSeq'));
 269     }
 270 }
 271
 272 =head2 next_seq
 273
 274  Title   : next_seq
 275  Usage   : $seq = $stream->next_seq()
 276  Function: returns the next sequence in the stream
 277  Returns : Bio::Seq::RichSeq object
 278  Args    :
 279
 280 =cut
 281
 282 sub next_seq {
 283     my $self = shift;
 284
 285     # skip until not a comment and not an empty line
 286     my $line_ok = $self->_next_record();
 287
 288     # if there is a header but we haven't read past it yet then do so now
 289     if ($line_ok && (! $self->_header_skipped) && $self->header) {
 290         $line_ok = $self->_parse_header();
 291         $self->_header_skipped(1);
 292     }
 293
 294     # return if we reached end-of-file
 295     return unless $line_ok;
 296
 297     # otherwise, parse the record
 298
 299     # split into columns
 300     my @cols = $self->_get_row_values();
 301     # trim leading and trailing whitespace and quotes if desired
 302     if ($self->trim_values) {
 303         for(my $i = 0; $i < scalar(@cols); $i++) {
 304             if ($cols[$i]) {
 305                 # trim off whitespace
 306                 $cols[$i] =~ s/^\s+//;
 307                 $cols[$i] =~ s/\s+$//;
 308                 # trim off double quotes
 309                 $cols[$i] =~ s/^"//;
 310                 $cols[$i] =~ s/"$//;
 311             }
 312         }
 313     }
 314
 315     # assign values for columns in the attribute map
 316     my $attrmap = $self->_attribute_map;
 317     my %params = ();
 318     foreach my $attr (keys %$attrmap) {
 319         if ((!ref($attrmap->{$attr})) && ($attrmap->{$attr} =~ /^\d+$/)) {
 320             # this is a column index, add to instantiation parameters
 321             $params{$attr} = $cols[$attrmap->{$attr}];
 322         } else {
 323             # not a column index; we assume it's a static value
 324             $params{$attr} = $attrmap->{$attr};
 325         }
 326     }
 327
 328     # add annotation columns to the annotation bundle
 329     my $annmap = $self->_annotation_map;
 330     if ($annmap && %$annmap) {
 331         my $anncoll = Bio::Annotation::Collection->new();
 332         foreach my $col (keys %$annmap) {
 333             next unless $cols[$col]; # skip empty columns!
 334             $anncoll->add_Annotation(
 335                 Bio::Annotation::SimpleValue->new(-value  => $cols[$col],
 336                                                   -tagname=> $annmap->{$col}));
 337         }
 338         $params{'-annotation'} = $anncoll;
 339     }
 340
 341     # ask the object builder to add the slots that we've gathered
 342     my $builder = $self->sequence_builder();
 343     $builder->add_slot_value(%params);
 344     # and instantiate the object
 345     my $seq = $builder->make_object();
 346
 347     # done!
 348     return $seq;
 349 }
 350
 351 =head2 comment_char
 352
 353  Title   : comment_char
 354  Usage   : $obj->comment_char($newval)
 355  Function: Get/set the leading character(s) designating a line as
 356            a comment-line.
 357  Example :
 358  Returns : value of comment_char (a scalar)
 359  Args    : on set, new value (a scalar or undef, optional)
 360
 361
 362 =cut
 363
 364 sub comment_char{
 365     my $self = shift;
 366
 367     return $self->{'comment_char'} = shift if @_;
 368     return $self->{'comment_char'};
 369 }
 370
 371 =head2 header
 372
 373  Title   : header
 374  Usage   : $obj->header($newval)
 375  Function: Get/set the number of header lines to skip before the
 376            rows containing actual sequence records.
 377
 378            If set to zero or undef, means that there is no header and
 379            therefore also no column headers.
 380
 381  Example :
 382  Returns : value of header (a scalar)
 383  Args    : on set, new value (a scalar or undef, optional)
 384
 385
 386 =cut
 387
 388 sub header{
 389     my $self = shift;
 390
 391     return $self->{'header'} = shift if @_;
 392     return $self->{'header'};
 393 }
 394
 395 =head2 delimiter
 396
 397  Title   : delimiter
 398  Usage   : $obj->delimiter($newval)
 399  Function: Get/set the column delimiter. This will in fact be
 400            treated as a regular expression. Consecutive occurrences
 401            will not be collapsed to a single one.
 402
 403  Example :
 404  Returns : value of delimiter (a scalar)
 405  Args    : on set, new value (a scalar or undef, optional)
 406
 407
 408 =cut
 409
 410 sub delimiter{
 411     my $self = shift;
 412
 413     return $self->{'delimiter'} = shift if @_;
 414     return $self->{'delimiter'};
 415 }
 416
 417 =head2 attribute_map
 418
 419  Title   : attribute_map
 420  Usage   : $obj->attribute_map($newval)
 421  Function: Get/set the map of sequence object initialization
 422            attributes (keys) to one-based column index.
 423
 424            Attributes will usually need to be prefixed by a dash, just
 425            as if they were passed to the new() method of the sequence
 426            class.
 427
 428  Example :
 429  Returns : value of attribute_map (a reference to a hash)
 430  Args    : on set, new value (a reference to a hash or undef, optional)
 431
 432
 433 =cut
 434
 435 sub attribute_map{
 436     my $self = shift;
 437
 438     # internally we store zero-based maps - so we need to convert back
 439     # and forth here
 440     if (@_) {
 441         my $arg = shift;
 442         # allow for and protect against undef
 443         return delete $self->{'_attribute_map'} unless defined($arg);
 444         # copy to avoid side-effects
 445         my $attr_map = {%$arg};
 446         foreach my $key (keys %$attr_map) {
 447             if ((!ref($attr_map->{$key})) && ($attr_map->{$key} =~ /^\d+$/)) {
 448                 $attr_map->{$key}--;
 449             }
 450         }
 451         $self->{'_attribute_map'} = $attr_map;
 452     }
 453     # there may not be a map
 454     return unless exists($self->{'_attribute_map'});
 455     # we need to copy in order not to override the stored map!
 456     my %attr_map = %{$self->{'_attribute_map'}};
 457     foreach my $key (keys %attr_map) {
 458         if ((!ref($attr_map{$key})) && ($attr_map{$key} =~ /^\d+$/)) {
 459             $attr_map{$key}++;
 460         }
 461     }
 462     return \%attr_map;
 463 }
 464
 465 =head2 annotation_map
 466
 467  Title   : annotation_map
 468  Usage   : $obj->annotation_map($newval)
 469  Function: Get/set the mapping between one-based column indexes
 470            (keys) and annotation tags (values).
 471
 472            Note that the map returned by this method may change after
 473            the first next_seq() call if the file contains a column
 474            header and no annotation keys have been predefined in the
 475            map, because upon reading the column header line the tag
 476            names will be set automatically.
 477
 478            Note also that the map may reference columns that are used
 479            as well in the sequence attribute map.
 480
 481  Example :
 482  Returns : value of annotation_map (a reference to a hash)
 483  Args    : on set, new value (a reference to a hash or undef, optional)
 484
 485
 486 =cut
 487
 488 sub annotation_map{
 489     my $self = shift;
 490
 491     # internally we store zero-based maps - so we need to convert back
 492     # and forth here
 493     if (@_) {
 494         my $arg = shift;
 495         # allow for and protect against undef
 496         return delete $self->{'_annotation_map'} unless defined($arg);
 497         # copy to avoid side-effects
 498         my $ann_map = {%$arg};
 499         # make sure we sort the keys numerically or otherwise we may
 500         # clobber a key with a higher index
 501         foreach my $key (sort { $a <=> $b } keys(%$ann_map)) {
 502             $ann_map->{$key-1} = $ann_map->{$key};
 503             delete $ann_map->{$key};
 504         }
 505         $self->{'_annotation_map'} = $ann_map;
 506         # also make a note that we want to keep annotation
 507         $self->keep_annotation(1);
 508     }
 509     # there may not be a map
 510     return unless exists($self->{'_annotation_map'});
 511     # we need to copy in order not to override the stored map!
 512     my %ann_map = %{$self->{'_annotation_map'}};
 513     # here we need to sort numerically in reverse order ...
 514     foreach my $key (sort { $b <=> $a } keys(%ann_map)) {
 515         $ann_map{$key+1} = $ann_map{$key};
 516         delete $ann_map{$key};
 517     }
 518     return \%ann_map;
 519 }
 520
 521 =head2 keep_annotation
 522
 523  Title   : keep_annotation
 524  Usage   : $obj->keep_annotation($newval)
 525  Function: Get/set flag whether or not to keep values from
 526            additional columns as annotation.
 527
 528            Additional columns are all those columns in the input file
 529            that aren't referenced in the attribute map.
 530
 531  Example :
 532  Returns : value of keep_annotation (a scalar)
 533  Args    : on set, new value (a scalar or undef, optional)
 534
 535
 536 =cut
 537
 538 sub keep_annotation{
 539     my $self = shift;
 540
 541     return $self->{'keep_annotation'} = shift if @_;
 542     return $self->{'keep_annotation'};
 543 }
 544
 545 =head2 annotation_columns
 546
 547  Title   : annotation_columns
 548  Usage   : $obj->annotation_columns($newval)
 549  Function: Get/set the names (labels) of the columns to be used for
 550            annotation.
 551
 552            This is an alternative to using annotation_map. In order to
 553            have any effect, it must be set before the first call of
 554            next_seq(), and obviously there must be a header line (or
 555            row) too giving the column labels.
 556
 557  Example :
 558  Returns : value of annotation_columns (a reference to an array)
 559  Args    : on set, new value (a reference to an array of undef, optional)
 560
 561
 562 =cut
 563
 564 sub annotation_columns{
 565     my $self = shift;
 566
 567     return $self->{'annotation_columns'} = shift if @_;
 568     return $self->{'annotation_columns'};
 569 }
 570
 571 =head2 trim_values
 572
 573  Title   : trim_values
 574  Usage   : $obj->trim_values($newval)
 575  Function: Get/set whether or not to trim leading and trailing
 576            whitespace off all column values.
 577  Example :
 578  Returns : value of trim_values (a scalar)
 579  Args    : on set, new value (a scalar or undef, optional)
 580
 581
 582 =cut
 583
 584 sub trim_values{
 585     my $self = shift;
 586
 587     return $self->{'trim_values'} = shift if @_;
 588     return $self->{'trim_values'};
 589 }
 590
 591 =head1 Internal methods
 592
 593 All methods with a leading underscore are not meant to be part of the
 594 'official' API. They are for use by this module only, consider them
 595 private unless you are a developer trying to modify this module.
 596
 597 =cut
 598
 599 =head2 _attribute_map
 600
 601  Title   : _attribute_map
 602  Usage   : $obj->_attribute_map($newval)
 603  Function: Get only. Same as attribute_map, but zero-based indexes.
 604
 605            Note that any changes made to the returned map will change
 606            the map used by this instance. You should know what you are
 607            doing if you modify the returned value (or if you call this
 608            method in the first place).
 609
 610  Example :
 611  Returns : value of _attribute_map (a reference to a hash)
 612  Args    : none
 613
 614
 615 =cut
 616
 617 sub _attribute_map{
 618     my $self = shift;
 619
 620     return $self->{'_attribute_map'};
 621 }
 622
 623 =head2 _annotation_map
 624
 625  Title   : _annotation_map
 626  Usage   : $obj->_annotation_map($newval)
 627  Function: Get only. Same as annotation_map, but with zero-based indexes.
 628
 629            Note that any changes made to the returned map will change
 630            the map used by this instance. You should know what you are
 631            doing if you modify the returned value (or if you call this
 632            method in the first place).
 633
 634  Example :
 635  Returns : value of _annotation_map (a reference to a hash)
 636  Args    : none
 637
 638
 639 =cut
 640
 641 sub _annotation_map{
 642     my $self = shift;
 643
 644     return $self->{'_annotation_map'};
 645 }
 646
 647 =head2 _header_skipped
 648
 649  Title   : _header_skipped
 650  Usage   : $obj->_header_skipped($newval)
 651  Function: Get/set the flag whether the header was already
 652            read (and skipped) or not.
 653  Example :
 654  Returns : value of _header_skipped (a scalar)
 655  Args    : on set, new value (a scalar or undef, optional)
 656
 657
 658 =cut
 659
 660 sub _header_skipped{
 661     my $self = shift;
 662
 663     return $self->{'_header_skipped'} = shift if @_;
 664     return $self->{'_header_skipped'};
 665 }
 666
 667 =head2 _next_record
 668
 669  Title   : _next_record
 670  Usage   :
 671  Function: Navigates the underlying file to the next record.
 672
 673            For row-based records in delimited text files, this will
 674            skip all empty lines and lines with a leading comment
 675            character.
 676
 677            This method is here is to serve as a hook for other formats
 678            that conceptually also represent tables but aren't
 679            formatted as row-based text files.
 680
 681  Example :
 682  Returns : TRUE if the navigation was successful and FALSE
 683            otherwise. Unsuccessful navigation will usually be treated
 684            as an end-of-file condition.
 685  Args    :
 686
 687
 688 =cut
 689
 690 sub _next_record{
 691     my $self = shift;
 692
 693     my $cmtcc = $self->comment_char;
 694     my $line = $self->_readline();
 695
 696     # skip until not a comment and not an empty line
 697     while (defined($line)
 698            && (($cmtcc && ($line =~ /^\s*$cmtcc/))
 699                || ($line =~ /^\s*$/))) {
 700         $line = $self->_readline();
 701     }
 702
 703     return $self->{'_line'} = $line;
 704 }
 705
 706 =head2 _parse_header
 707
 708  Title   : _parse_header
 709  Usage   :
 710  Function: Parse the table header and navigate past it.
 711
 712            This method is called if the number of header rows has been
 713            specified equal to or greater than one, and positioned at
 714            the first header line (row). By default the first header
 715            line (row) is used for setting column names, but additional
 716            lines (rows) may be skipped too. Empty lines and comment
 717            lines do not count as header lines (rows).
 718
 719            This method will call _next_record() to navigate to the
 720            next header line (row), if there is more than one header
 721            line (row). Upon return, the file is presumed to be
 722            positioned at the first record after the header.
 723
 724            This method is here is to serve as a hook for other formats
 725            that conceptually also represent tables but aren't
 726            formatted as row-based text files.
 727
 728            Note however that the only methods used to access file
 729            content or navigate the position are _get_row_values() and
 730            _next_record(), so it should usually suffice to override
 731            those.
 732
 733  Example :
 734  Returns : TRUE if navigation past the header was successful and FALSE
 735            otherwise. Unsuccessful navigation will usually be treated
 736            as an end-of-file condition.
 737  Args    :
 738
 739
 740 =cut
 741
 742 sub _parse_header{
 743     my $self = shift;
 744
 745     # the first header line contains the column headers, see whether
 746     # we need them
 747     if ($self->keep_annotation) {
 748         my @colnames = $self->_get_row_values();
 749         # trim leading and trailing whitespace if desired
 750         if ($self->trim_values) {
 751             # trim off whitespace
 752             @colnames = map { $_ =~ s/^\s+//; $_ =~ s/\s+$//; $_; } @colnames;
 753             # trim off double quotes
 754             @colnames = map { $_ =~ s/^"//; $_ =~ s/"$//; $_; } @colnames;
 755         }
 756         # build or complete annotation column map
 757         my $annmap = $self->annotation_map || {};
 758         if (! %$annmap) {
 759             # check whether columns have been defined by name rather than index
 760             if (my $anncols = $self->annotation_columns) {
 761                 # first sanity check: all column names must map
 762                 my %colmap = map { ($_,1); } @colnames;
 763                 foreach my $col (@$anncols) {
 764                     if (!exists($colmap{$col})) {
 765                         $self->throw("no such column labeled '$col'");
 766                     }
 767                 }
 768                 # now map to the column indexes
 769                 %colmap = map { ($_,1); } @$anncols;
 770                 for (my $i = 0; $i < scalar(@colnames); $i++) {
 771                     if (exists($colmap{$colnames[$i]})) {
 772                         $annmap->{$i+1} = $colnames[$i];
 773                     }
 774                 }
 775             } else {
 776                 # no columns specified, default to all non-attribute columns
 777                 for (my $i = 0; $i < scalar(@colnames); $i++) {
 778                     $annmap->{$i+1} = $colnames[$i];
 779                 }
 780                 # subtract all attribute-referenced columns
 781                 foreach my $attrcol (values %{$self->attribute_map}) {
 782                     if ((!ref($attrcol)) && ($attrcol =~ /^\d+$/)) {
 783                         delete $annmap->{$attrcol};
 784                     }
 785                 }
 786             }
 787         } else {
 788             # fill in where the tag names weren't pre-defined
 789             for (my $i = 0; $i < scalar(@colnames); $i++) {
 790                 if (exists($annmap->{$i+1}) && ! defined($annmap->{$i+1})) {
 791                     $annmap->{$i+1} = $colnames[$i];
 792                 }
 793             }
 794         }
 795         $self->annotation_map($annmap);
 796     }
 797
 798     # now read past the header
 799     my $header_lines = $self->header;
 800     my $line_ok = 1;
 801     while (defined($line_ok) && ($header_lines > 0)) {
 802         $line_ok = $self->_next_record();
 803         $header_lines--;
 804     }
 805
 806     return $line_ok;
 807 }
 808
 809 =head2 _get_row_values
 810
 811  Title   : _get_row_values
 812  Usage   :
 813  Function: Get the values for the current line (or row) as an array in
 814            the order of columns.
 815
 816            This method is here is to serve as a hook for other formats
 817            that conceptually also represent tables but aren't
 818            formatted as row-based text files.
 819
 820  Example :
 821  Returns : An array of column values for the current row.
 822  Args    :
 823
 824
 825 =cut
 826
 827 sub _get_row_values{
 828     my $self = shift;
 829     my $delim = $self->delimiter;
 830     my $line = $self->{'_line'};
 831     chomp($line);
 832     my @cols = split(/$delim/,$line);
 833     return @cols;
 834 }
 835
 836 1;