Bio/AlignIO/stockholm.pm

   1 #
   2 # BioPerl module for Bio::AlignIO::stockholm
   3 #
   4 #   Based on the Bio::SeqIO::stockholm module
   5 #       by Ewan Birney <birney@ebi.ac.uk>
   6 #       and Lincoln Stein  <lstein@cshl.org>
   7 #
   8 #       and the SimpleAlign.pm module of Ewan Birney
   9 #
  10 # Copyright Peter Schattner, Chris Fields
  11 #
  12 # You may distribute this module under the same terms as perl itself
  13 # _history
  14 # September 5, 2000
  15 # November 6, 2006 - completely refactor read_aln(), add write_aln()
  16 # POD documentation - main docs before the code
  17
  18 =head1 NAME
  19
  20 Bio::AlignIO::stockholm - stockholm sequence input/output stream
  21
  22 =head1 SYNOPSIS
  23
  24   # Do not use this module directly.  Use it via the L<Bio::AlignIO> class.
  25
  26   use Bio::AlignIO;
  27   use strict;
  28
  29   my $in = Bio::AlignIO->new(-format => 'stockholm',
  30                              -file   => 't/data/testaln.stockholm');
  31   while( my $aln = $in->next_aln ) {
  32
  33   }
  34
  35 =head1 DESCRIPTION
  36
  37 This object can transform L<Bio::Align::AlignI> objects to and from
  38 stockholm flat file databases.  This has been completely refactored
  39 from the original stockholm parser to handle annotation data and now
  40 includes a write_aln() method for (almost) complete stockholm
  41 format output.
  42
  43 Stockholm alignment records normally contain additional sequence-based
  44 and alignment-based annotation
  45
  46   GF Lines (alignment feature/annotation):
  47   #=GF <featurename> <Generic per-file annotation, free text>
  48   Placed above the alignment
  49
  50   GC Lines (Alignment consensus)
  51   #=GC <featurename> <Generic per-column annotation, exactly 1
  52        character per column>
  53   Placed below the alignment
  54
  55   GS Lines (Sequence annotations)
  56   #=GS <seqname> <featurename> <Generic per-sequence annotation, free
  57        text>
  58
  59   GR Lines (Sequence meta data)
  60   #=GR <seqname> <featurename> <Generic per-sequence AND per-column
  61        mark up, exactly 1 character per column>
  62
  63 Currently, sequence annotations (those designated with GS tags) are
  64 parsed only for accession numbers and descriptions.  It is intended that
  65 full parsing will be added at some point in the near future along with
  66 a builder option for optionally parsing alignment annotation and meta data.
  67
  68 The following methods/tags are currently used for storing and writing
  69 the alignment annotation data.
  70
  71     Tag        SimpleAlign
  72                  Method
  73     ----------------------------------------------------------------------
  74      AC        accession
  75      ID        id
  76      DE        description
  77     ----------------------------------------------------------------------
  78
  79     Tag        Bio::Annotation   TagName                    Parameters
  80                Class
  81     ----------------------------------------------------------------------
  82      AU        SimpleValue       record_authors             value
  83      SE        SimpleValue       seed_source                value
  84      GA        SimpleValue       gathering_threshold        value
  85      NC        SimpleValue       noise_cutoff               value
  86      TC        SimpleValue       trusted_cutoff             value
  87      TP        SimpleValue       entry_type                 value
  88      SQ        SimpleValue       num_sequences              value
  89      PI        SimpleValue       previous_ids               value
  90      DC        Comment           database_comment           comment
  91      CC        Comment           alignment_comment          comment
  92      DR        Target            dblink                     database
  93                                                             primary_id
  94                                                             comment
  95      AM        SimpleValue       build_method               value
  96      NE        SimpleValue       pfam_family_accession      value
  97      NL        SimpleValue       sequence_start_stop        value
  98      SS        SimpleValue       sec_structure_source       value
  99      BM        SimpleValue       build_model                value
 100      RN        Reference         reference                  *
 101      RC        Reference         reference                  comment
 102      RM        Reference         reference                  pubmed
 103      RT        Reference         reference                  title
 104      RA        Reference         reference                  authors
 105      RL        Reference         reference                  location
 106     ----------------------------------------------------------------------
 107   * RN is generated based on the number of Bio::Annotation::Reference objects
 108
 109 =head2 Custom annotation
 110
 111 Some users may want to add custom annotation beyond those mapped above.
 112 Currently there are two methods to do so; however, the methods used for adding
 113 such annotation may change in the future, particularly if alignment Writer
 114 classes are introduced. In particular, do not rely on changing the global
 115 variables @WRITEORDER or %WRITEMAP as these may be made private at some point.
 116
 117 1) Use (and abuse) the 'custom' tag.  The tagname for the object can differ
 118 from the tagname used to store the object in the AnnotationCollection.
 119
 120     # AnnotationCollection from the SimpleAlign object
 121     my $coll = $aln->annotation;
 122     my $factory = Bio::Annotation::AnnotationFactory->new(-type =>
 123         Bio::Annotation::SimpleValue');
 124     my $rfann = $factory->create_object(-value => $str,
 125                                         -tagname => 'mytag');
 126     $coll->add_Annotation('custom', $rfann);
 127     $rfann = $factory->create_object(-value => 'foo',
 128                                     -tagname => 'bar');
 129     $coll->add_Annotation('custom', $rfann);
 130
 131 OUTPUT:
 132
 133     # STOCKHOLM 1.0
 134
 135     #=GF ID myID12345
 136     #=GF mytag katnayygqelggvnhdyddlakfyfgaglealdffnnkeaaakiinwvaEDTTRGKIQDLV??
 137     #=GF mytag TPtd~????LDPETQALLV???????????????????????NAIYFKGRWE?????????~??
 138     #=GF mytag ??HEF?A?EMDTKPY??DFQH?TNen?????GRI??????V???KVAM??MF?????????N??
 139     #=GF mytag ???DD?VFGYAEL????DE???????L??D??????A??TALELAY??????????????????
 140     #=GF mytag ?????????????KG??????Sa???TSMLILLP???????????????D??????????????
 141     #=GF mytag ???????????EGTr?????AGLGKLLQ??QL????????SREef??DLNK??L???AH????R
 142     #=GF mytag ????????????L????????????????????????????????????????R?????????R
 143     #=GF mytag ??QQ???????V???????AVRLPKFSFefefdlkeplknlgmhqafdpnsdvfklmdqavlvi
 144     #=GF mytag gdlqhayafkvd????????????????????????????????????????????????????
 145     #=GF mytag ????????????????????????????????????????????????????????????????
 146     #=GF mytag ????????????????????????????????????????????????????????????????
 147     #=GF mytag ????????????????????????????????????????????????????????????????
 148     #=GF mytag ?????????????INVDEAG?TEAAAATAAKFVPLSLppkt??????????????????PIEFV
 149     #=GF mytag ADRPFAFAIR??????E?PAT?G????SILFIGHVEDPTP?msv?
 150     #=GF bar foo
 151     ...
 152
 153 2) Modify the global @WRITEORDER and %WRITEMAP.
 154
 155     # AnnotationCollection from the SimpleAlign object
 156     my $coll = $aln->annotation;
 157
 158     # add to WRITEORDER
 159     my @order = @Bio::AlignIO::stockholm::WRITEORDER;
 160     push @order, 'my_stuff';
 161     @Bio::AlignIO::stockholm::WRITEORDER = @order;
 162
 163     # make sure new tag maps to something
 164     $Bio::AlignIO::stockholm::WRITEMAP{my_stuff} = 'Hobbit/SimpleValue';
 165
 166     my $rfann = $factory->create_object(-value => 'Frodo',
 167                                         -tagname => 'Hobbit');
 168     $coll->add_Annotation('my_stuff', $rfann);
 169     $rfann = $factory->create_object(-value => 'Bilbo',
 170                                      -tagname => 'Hobbit');
 171     $coll->add_Annotation('my_stuff', $rfann);
 172
 173 OUTPUT:
 174
 175     # STOCKHOLM 1.0
 176
 177     #=GF ID myID12345
 178     #=GF Hobbit Frodo
 179     #=GF Hobbit Bilbo
 180     ....
 181
 182 =head1 FEEDBACK
 183
 184 =head2 Support
 185
 186 Please direct usage questions or support issues to the mailing list:
 187
 188 I<bioperl-l@bioperl.org>
 189
 190 rather than to the module maintainer directly. Many experienced and
 191 reponsive experts will be able look at the problem and quickly
 192 address it. Please include a thorough description of the problem
 193 with code and data examples if at all possible.
 194
 195 =head2 Reporting Bugs
 196
 197 Report bugs to the Bioperl bug tracking system to help us keep track
 198 the bugs and their resolution.  Bug reports can be submitted via the
 199 web:
 200
 201   https://redmine.open-bio.org/projects/bioperl/
 202
 203 =head1 AUTHORS - Chris Fields, Peter Schattner
 204
 205 Email: cjfields-at-uiuc-dot-edu, schattner@alum.mit.edu
 206
 207 =head1 CONTRIBUTORS
 208
 209 Andreas Kahari, ak-at-ebi.ac.uk
 210 Jason Stajich, jason-at-bioperl.org
 211
 212 =head1 APPENDIX
 213
 214 The rest of the documentation details each of the object
 215 methods. Internal methods are usually preceded with a _
 216
 217 =cut
 218
 219 # Let the code begin...
 220
 221 package Bio::AlignIO::stockholm;
 222 use strict;
 223
 224 use Bio::Seq::Meta;
 225 use Bio::AlignIO::Handler::GenericAlignHandler;
 226 use Text::Wrap qw(wrap);
 227
 228 use base qw(Bio::AlignIO);
 229
 230 my $STKVERSION = 'STOCKHOLM 1.0';
 231
 232 # This maps the two-letter annotation key to a Annotation/parameter/tagname
 233 # combination.  Some data is stored using get/set methods ('Methods')  The rest
 234 # is mapped to Annotation objects using the parameter for the parsed data
 235 # and the tagname for, well, the Annotation tagname.  A few are treated differently
 236 # based on the type of data stored (Reference data in particular).
 237
 238 my %MAPPING = (
 239     'AC'    =>  'ACCESSION',
 240     'ID'    =>  'ID',
 241     'DE'    =>  ['DESCRIPTION' => 'DESCRIPTION'],
 242     'AU'    =>  ['RECORD_AUTHORS' => 'RECORD_AUTHORS'],
 243     'SE'    =>  'SEED_SOURCE',
 244     'BM'    =>  'BUILD_COMMAND',
 245     'GA'    =>  'GATHERING_THRESHOLD',
 246     'NC'    =>  'NOISE_CUTOFF',
 247     'TC'    =>  'TRUSTED_CUTOFF',
 248     'TP'    =>  'ENTRY_TYPE',
 249     'SQ'    =>  'NUM_SEQUENCES',
 250     'PI'    =>  'PREVIOUS_IDS',
 251     'DC'    =>  ['DATABASE_COMMENT' => 'DATABASE_COMMENT'],
 252     'DR'    =>  'DBLINK',
 253     'RN'    =>  ['REFERENCE' => 'REFERENCE'],
 254     'RC'    =>  ['REFERENCE' => 'COMMENT'],
 255     'RM'    =>  ['REFERENCE' => 'PUBMED'],
 256     'RT'    =>  ['REFERENCE' => 'TITLE'],
 257     'RA'    =>  ['REFERENCE' => 'AUTHORS'],
 258     'RL'    =>  ['REFERENCE' => 'JOURNAL'],
 259     'CC'    =>  ['ALIGNMENT_COMMENT' => 'ALIGNMENT_COMMENT'],
 260     #Pfam-specific
 261     'AM'    =>  'BUILD_METHOD',
 262     'NE'    =>  'PFAM_FAMILY_ACCESSION',
 263     'NL'    =>  'SEQ_START_STOP',
 264     # Rfam-specific GF lines
 265     #'SS'    =>  'SEC_STRUCTURE_SOURCE',
 266     'SEQUENCE' => 'SEQUENCE'
 267 );
 268
 269 # this is the order that annotations are written
 270 our @WRITEORDER = qw(accession
 271   id
 272   description
 273   previous_ids
 274   record_authors
 275   seed_source
 276   sec_structure_source
 277   gathering_threshold
 278   trusted_cutoff
 279   noise_cutoff
 280   entry_type
 281   build_command
 282   build_method
 283   pfam_family_accession
 284   seq_start_stop
 285   reference
 286   database_comment
 287   custom
 288   dblink
 289   alignment_comment
 290   num_sequences
 291   seq_annotation
 292   );
 293
 294 # This maps the tagname back to a tagname-annotation value combination.
 295 # Some data is stored using get/set methods ('Methods'), others
 296 # are mapped b/c of more complex annotation types.
 297
 298 our %WRITEMAP = (
 299     'accession'             =>  'AC/Method',
 300     'id'                    =>  'ID/Method',
 301     'description'           =>  'DE/Method',
 302     'record_authors'        =>  'AU/SimpleValue',
 303     'seed_source'           =>  'SE/SimpleValue',
 304     'build_command'         =>  'BM/SimpleValue',
 305     'gathering_threshold'   =>  'GA/SimpleValue',
 306     'noise_cutoff'          =>  'NC/SimpleValue',
 307     'trusted_cutoff'        =>  'TC/SimpleValue',
 308     'entry_type'            =>  'TP/SimpleValue',
 309     'num_sequences'         =>  'SQ/SimpleValue',
 310     'previous_ids'          =>  'PI/SimpleValue',
 311     'database_comment'      =>  'DC/SimpleValue',
 312     'dblink'                =>  'DR/DBLink',
 313     'reference'             =>  'RX/Reference',
 314     'ref_number'            =>  'RN/number',
 315     'ref_comment'           =>  'RC/comment',
 316     'ref_pubmed'            =>  'RM/pubmed',
 317     'ref_title'             =>  'RT/title',
 318     'ref_authors'           =>  'RA/authors',
 319     'ref_location'          =>  'RL/location',
 320     'alignment_comment'     =>  'CC/Comment',
 321     'seq_annotation'        =>  'DR/Collection',
 322     #Pfam-specific
 323     'build_method'          =>  'AM/SimpleValue',
 324     'pfam_family_accession' =>  'NE/SimpleValue',
 325     'seq_start_stop'        =>  'NL/SimpleValue',
 326     # Rfam-specific GF lines
 327     'sec_structure_source'  =>  'SS/SimpleValue',
 328     # custom; this is used to carry over anything from the input alignment
 329     # not mapped to LocatableSeqs or SimpleAlign in a meaningful way
 330     'custom'                =>  'XX/SimpleValue'
 331 );
 332
 333 # This maps the tagname back to a tagname-annotation value combination.
 334 # Some data is stored using get/set methods ('Methods'), others
 335 # are mapped b/c of more complex annotation types.
 336
 337 =head2 new
 338
 339  Title   : new
 340  Usage   : my $alignio = Bio::AlignIO->new(-format => 'stockholm'
 341                                           -file   => '>file');
 342  Function: Initialize a new L<Bio::AlignIO::stockholm> reader or writer
 343  Returns : L<Bio::AlignIO> object
 344  Args    : -line_length :  length of the line for the alignment block
 345            -alphabet    :  symbol alphabet to set the sequences to.  If not set,
 346                            the parser will try to guess based on the alignment
 347                            accession (if present), defaulting to 'dna'.
 348            -spaces      :  (optional, def = 1) boolean to add a space in between
 349                            the "# STOCKHOLM 1.0" header and the annotation and
 350                            the annotation and the alignment.
 351
 352 =cut
 353
 354 sub _initialize {
 355     my ( $self, @args ) = @_;
 356     $self->SUPER::_initialize(@args);
 357     my ($handler, $linelength, $spaces) = $self->_rearrange([qw(HANDLER LINE_LENGTH SPACES)],@args);
 358     $spaces = defined $spaces ? $spaces : 1;
 359     $self->spaces($spaces);
 360     # hash for functions for decoding keys.
 361     $handler ? $self->alignhandler($handler) :
 362     $self->alignhandler(Bio::AlignIO::Handler::GenericAlignHandler->new(
 363                     -format => 'stockholm',
 364                     -verbose => $self->verbose,
 365                     ));
 366     $linelength && $self->line_length($linelength);
 367 }
 368
 369 =head2 next_aln
 370
 371  Title   : next_aln
 372  Usage   : $aln = $stream->next_aln()
 373  Function: returns the next alignment in the stream.
 374  Returns : L<Bio::Align::AlignI> object
 375  Args    : NONE
 376
 377 =cut
 378
 379 sub next_aln {
 380     my $self = shift;
 381
 382     my $handler = $self->alignhandler;
 383     # advance to alignment header
 384     while( defined(my $line = $self->_readline) ) {
 385         if ($line =~ m{^\#\s*STOCKHOLM\s+}xmso) {
 386             last;
 387         }
 388     }
 389
 390     $self->{block_line} = 0;
 391     # go into main body of alignment
 392     my ($data_chunk, $isa_primary, $name, $alphabet);
 393     my $last_feat = '';
 394     while( defined(my $line = $self->_readline) ) {
 395         # only blank lines are in between blocks, so reset block line
 396         my ($primary_tag, $secondary_tag, $data, $nse, $feat, $align, $concat);
 397         if ($line =~ m{^\s*$}xmso) {
 398             $self->{block_line} &&= 0;
 399             next;
 400         }
 401
 402         # End of Record
 403         if (index($line, '//') == 0) {
 404             # fencepost
 405             $handler->data_handler($data_chunk);
 406             undef $data_chunk;
 407             $handler->data_handler({ALIGNMENT => 1,
 408                                     NAME => 'ALPHABET',
 409                                     DATA => $self->alphabet})
 410                 if $self->alphabet;
 411             last;
 412         }
 413         elsif ($line =~ m{^\#=([A-Z]{2})\s+([^\n]+?)\s*$}xmso) {
 414             ($primary_tag, $data) = ($1, $2);
 415             if ($primary_tag eq 'GS' || $primary_tag eq 'GR') {
 416                 ($nse, $feat, $data) = split(/\s+/, $data, 3);
 417             } else {
 418                 ($feat, $data) = split(/\s+/, $data, 2);
 419             }
 420             $align = ($primary_tag eq 'GF' || $primary_tag eq 'GR') ? 1 : 0;
 421         }
 422         elsif ($line =~ m{^(\S+)\s+([^\s]+)\s*}) {
 423             $self->{block_line}++;
 424             ($feat, $nse, $data) = ('SEQUENCE', $1, $2);
 425         }
 426         else {
 427             $self->debug("Missed line : $line\n");
 428         }
 429         $primary_tag ||= ''; # when no #= line is present
 430         $align ||= 0;
 431
 432         # array refs where the two values are equal indicate the start of a
 433         # primary chunk of data, otherwise it is to be folded into the last
 434         # data chunk under a secondary tag.  These are also concatenated
 435         # to previous values if the
 436
 437         if (exists($MAPPING{$feat}) && ref $MAPPING{$feat} eq 'ARRAY') {
 438             ($name, $secondary_tag, $isa_primary) = ( $MAPPING{$feat}->[0] eq $MAPPING{$feat}->[1] ) ?
 439                 ($MAPPING{$feat}->[0], 'DATA', 1) :
 440                 (@{ $MAPPING{$feat} }, 0) ;
 441             $concat = $last_feat eq $feat ? 1 : 0;
 442         } elsif (exists($MAPPING{$feat})) {
 443             ($name, $secondary_tag, $isa_primary) = ($MAPPING{$feat}, 'DATA', 1);
 444             # catch alphabet here if possible
 445             if ($align && $name eq 'ACCESSION' && !$self->alphabet) {
 446                 if ($data =~ m{^(P|R)F}) {
 447                     $self->alphabet($1 eq 'R' ? 'rna' : $1 eq 'P' ? 'protein' : undef );
 448                 }
 449             }
 450         } else {
 451             $name = ($primary_tag eq 'GR') ? 'NAMED_META' :
 452                     ($primary_tag eq 'GC') ? 'CONSENSUS_META' :
 453                     'CUSTOM';
 454             ($secondary_tag, $isa_primary) = ('DATA', 1);
 455         }
 456
 457         # Since we can't determine whether data should be passed into the
 458         # Handler until the next round (due to concatenation and combining
 459         # data), we always check for the presence of the last chunk when the
 460         # occasion calls for it (i.e. when the current data string needs to go
 461         # into a new data chunk). If the data needs to be concatenated it is
 462         # flagged above and checked below (and passed by if the conditions
 463         # warrant it).
 464
 465         # We run into a bit of a fencepost problem, (one chunk left over at
 466         # the end); that is taken care of above when the end of the record is
 467         # found.
 468
 469         if ($isa_primary && defined $data_chunk && !$concat) {
 470             $handler->data_handler($data_chunk);
 471             undef $data_chunk;
 472         }
 473         $data_chunk->{NAME} = $name;       # used for the handler
 474         $data_chunk->{ALIGNMENT} = $align; # flag that determines chunk destination
 475         $data_chunk->{$secondary_tag} .= (defined($data_chunk->{$secondary_tag})) ?
 476             ' '.$data : $data;
 477         $data_chunk->{NSE} = $nse if $nse;
 478         if ($name eq 'SEQUENCE' || $name eq 'NAMED_META' || $name eq 'CONSENSUS_META') {
 479             $data_chunk->{BLOCK_LINE} = $self->{block_line};
 480             $data_chunk->{META_TAG} = $feat if ($name ne 'SEQUENCE');
 481         }
 482         $last_feat = $feat;
 483     }
 484
 485     my $aln = $handler->build_alignment;
 486     $handler->reset_parameters;
 487     return $aln;
 488 }
 489
 490 =head2 write_aln
 491
 492  Title   : write_aln
 493  Usage   : $stream->write_aln(@aln)
 494  Function: writes the $aln object into the stream in stockholm format
 495  Returns : 1 for success and 0 for error
 496  Args    : L<Bio::Align::AlignI> object
 497
 498 =cut
 499
 500 {
 501     my %LINK_CB = (
 502         'PDB' => sub {join('; ',($_[0]->database,
 503                                  $_[0]->primary_id.' '.
 504                                  ($_[0]->optional_id || ''),
 505                                  $_[0]->start,
 506                                  $_[0]->end)).';'},
 507         'SCOP' => sub {join('; ',($_[0]->database,
 508                                  $_[0]->primary_id || '',
 509                                  $_[0]->optional_id)).';'},
 510         '_DEFAULT_' => sub {join('; ',($_[0]->database,
 511                                  $_[0]->primary_id)).';'},
 512     );
 513
 514 sub write_aln {
 515     # enable array of SimpleAlign objects as well (see clustalw write_aln())
 516     my ($self, @aln) = @_;
 517     for my $aln (@aln) {
 518     $self->throw('Need Bio::Align::AlignI object')
 519           if (!$aln || !($aln->isa('Bio::Align::AlignI')));
 520
 521     my $coll = $aln->annotation;
 522     my ($aln_ann, $seq_ann) =
 523        ('#=GF ', '#=GS ');
 524     $self->_print("# $STKVERSION\n") || return 0;
 525     $self->spaces && $self->_print("\n");
 526     # annotations first
 527
 528     #=GF XX ....
 529     for my $param (@WRITEORDER) {
 530         my @anns;
 531         # no point in going through this if there is no annotation!
 532         last if !$coll;
 533         # alignment annotations
 534         my $ct = 1;
 535         $self->throw("Bad parameter: $param") if !exists $WRITEMAP{$param};
 536         # get the data, act on it based on the tag
 537         my ($tag, $key) = split q(/), $WRITEMAP{$param};
 538         if ($key eq 'Method') {
 539             push @anns, $aln->$param;
 540         } else {
 541             @anns = $coll->get_Annotations($param);
 542         }
 543         my $rn = 1;
 544         ANNOTATIONS:
 545         for my $ann (@anns) {
 546             # using Text::Wrap::wrap() for word wrap
 547             my ($text, $alntag, $data);
 548             if ($tag eq 'RX') {
 549                 REFS:
 550                 for my $rkey (qw(ref_comment ref_number ref_pubmed
 551                               ref_title ref_authors ref_location)) {
 552                     my ($newtag, $method) = split q(/), $WRITEMAP{$rkey};
 553                     $alntag = sprintf('%-10s',$aln_ann.$newtag);
 554                     if ($rkey eq 'ref_number') {
 555                         $data = "[$rn]";
 556                     } else {
 557                         $data = $ann->$method;
 558                     }
 559                     next REFS unless $data;
 560                     $text = wrap($alntag, $alntag, $data);
 561                     $self->_print("$text\n") or return 0;
 562                 }
 563                 $rn++;
 564                 next ANNOTATIONS;
 565             }
 566             elsif ($tag eq 'XX') { # custom
 567                 my $newtag = $ann->tagname;
 568                 my $tmp = $aln_ann.$newtag;
 569                 $alntag = sprintf('%-*s',length($tmp) + 1, $tmp);
 570                 $data = $ann->display_text;
 571             }
 572             elsif ($tag eq 'SQ') {
 573                 # use the actual number, not the stored Annotation data
 574                 my $tmp = $aln_ann.$tag;
 575                 $alntag = sprintf('%-*s',length($tmp) + 1, $tmp);
 576                 $data = $aln->num_sequences;
 577             }
 578             elsif ($tag eq 'DR') {
 579                 my $tmp = $aln_ann.$tag;
 580                 $alntag = sprintf('%-*s',length($tmp) + 1, $tmp);
 581                 my $db = uc $ann->database;
 582                 my $cb = exists $LINK_CB{$db} ? $LINK_CB{$db} : $LINK_CB{_DEFAULT_};
 583                 $data = $ann->display_text($cb);
 584             }
 585             else {
 586                 my $tmp = $aln_ann.$tag;
 587                 $alntag = sprintf('%-*s',length($tmp) + 1, $tmp);
 588                 $data = ref $ann ? $ann->display_text : $ann;
 589             }
 590             next unless $data;
 591             $text = wrap($alntag, $alntag, $data);
 592             $self->_print("$text\n") || return 0;
 593         }
 594     }
 595
 596     #=GS <seq-id> AC xxxxxx
 597     my $tag = 'AC';
 598     for my $seq ($aln->each_seq) {
 599         if (my $acc = $seq->accession_number) {
 600             my $text = sprintf("%-4s%-22s%-3s%s\n",$seq_ann,
 601                                $aln->displayname($seq->get_nse), $tag, $acc);
 602             $self->_print($text) || return 0;
 603         }
 604     }
 605
 606     #=GS <seq-id> DR xxxxxx
 607     $tag = 'DR';
 608     for my $sf ($aln->get_SeqFeatures) {
 609         if (my @links = $sf->annotation->get_Annotations('dblink')) {
 610             for my $link (@links) {
 611                 my $db = uc $link->database;
 612                 my $cb = exists $LINK_CB{$db} ? $LINK_CB{$db} : $LINK_CB{_DEFAULT_};
 613                 my $text = sprintf("%-4s%-22s%-3s%s\n",$seq_ann,
 614                                    $aln->displayname($sf->entire_seq->get_nse),
 615                                    $tag,
 616                                    $link->display_text($cb));
 617                 $self->_print($text) || return 0;
 618             }
 619         }
 620     }
 621
 622     $self->spaces && $self->_print("\n");
 623     # now the sequences...
 624
 625     my $blocklen = $self->line_length;
 626     my $maxlen = $aln->maxdisplayname_length() + 3;
 627     my $metalen = $aln->max_metaname_length() || 0;
 628     if ($blocklen) {
 629         my $blockstart = 1;
 630         my $alnlen = $aln->length;
 631         while ($blockstart < $alnlen) {
 632             my $subaln = $aln->slice($blockstart, $blockstart+$blocklen-1 ,1);
 633             $self->_print_seqs($subaln,$maxlen,$metalen);
 634             $blockstart += $blocklen;
 635             $self->_print("\n") unless $blockstart >= $alnlen;
 636         }
 637     } else {
 638         $self->_print_seqs($aln,$maxlen,$metalen);
 639     }
 640
 641     $self->_print("//\n") || return 0;
 642     }
 643     $self->flush() if $self->_flush_on_write && defined $self->_fh;
 644
 645     return 1;
 646 }
 647
 648 }
 649
 650 =head2 line_length
 651
 652  Title   : line_length
 653  Usage   : $obj->line_length($newval)
 654  Function: Set the alignment output line length
 655  Returns : value of line_length
 656  Args    : newvalue (optional)
 657
 658 =cut
 659
 660 sub line_length {
 661     my ( $self, $value ) = @_;
 662     if ( defined $value ) {
 663         $self->{'_line_length'} = $value;
 664     }
 665     return $self->{'_line_length'};
 666 }
 667
 668 =head2 spaces
 669
 670  Title   : spaces
 671  Usage   : $obj->spaces(1)
 672  Function: Set the 'spaces' flag, which prints extra newlines between the
 673            header and the annotation and the annotation and the alignment
 674  Returns : sequence data type
 675  Args    : newvalue (optional)
 676
 677 =cut
 678
 679 sub spaces {
 680     my $self = shift;
 681     return $self->{'_spaces'} = shift if @_;
 682     return $self->{'_spaces'};
 683 };
 684
 685 =head2 alignhandler
 686
 687  Title   : alignhandler
 688  Usage   : $stream->alignhandler($handler)
 689  Function: Get/Set the Bio::HandlerBaseI object
 690  Returns : Bio::HandlerBaseI
 691  Args    : Bio::HandlerBaseI
 692
 693 =cut
 694
 695 sub alignhandler {
 696     my ($self, $handler) = @_;
 697     if ($handler) {
 698         $self->throw("Not a Bio::HandlerBaseI") unless
 699         ref($handler) && $handler->isa("Bio::HandlerBaseI");
 700         $self->{'_alignhandler'} = $handler;
 701     }
 702     return $self->{'_alignhandler'};
 703 }
 704
 705 ############# PRIVATE INIT/HANDLER METHODS #############
 706
 707 sub _print_seqs {
 708     my ($self, $aln, $maxlen, $metalen) = @_;
 709
 710     my ($seq_meta, $aln_meta) = ('#=GR','#=GC');
 711     # modified (significantly) from AlignIO::pfam
 712
 713     my ($namestr,$seq,$add);
 714
 715     # pad extra for meta lines
 716
 717     for $seq ( $aln->each_seq() ) {
 718         my ($s, $e, $str) = ($seq->start, $seq->end, $seq->strand);
 719         $namestr = $aln->displayname($seq->get_nse());
 720         $self->_print(sprintf("%-*s%s\n",$maxlen+$metalen,
 721                               $namestr,
 722                               $seq->seq())) || return 0;
 723         if ($seq->isa('Bio::Seq::MetaI')) {
 724             for my $mname ($seq->meta_names) {
 725                  $self->_print(sprintf("%-*s%s\n",$maxlen+$metalen,
 726                                        $seq_meta.' '.$namestr.' '.$mname,
 727                                        $seq->named_meta($mname))) || return 0;
 728             }
 729         }
 730     }
 731     # alignment consensus
 732     my $ameta = $aln->consensus_meta;
 733     if ($ameta) {
 734         for my $mname ($ameta->meta_names) {
 735             $self->_print(sprintf("%-*s%s\n",$maxlen+$metalen,
 736                                   $aln_meta.' '.$mname,
 737                                   $ameta->named_meta($mname))) || return 0;
 738         }
 739     }
 740 }
 741
 742 1;