branch-1-6/Bio/Structure/SecStr/STRIDE/Res.pm

   1 # $id $
   2 #
   3 # bioperl module for Bio::Structure::SecStr::STRIDE::Res.pm
   4 #
   5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
   6 #
   7 # Cared for by Ed Green <ed@compbio.berkeley.edu>
   8 #
   9 # Copyright Univ. of California
  10 #
  11 # You may distribute this module under the same terms as perl itself
  12 #
  13 # POD documentation - main docs before the code
  14
  15 =head1 NAME
  16
  17 Bio::Structure::SecStr::STRIDE::Res - Module for parsing/accessing stride output
  18
  19 =head1 SYNOPSIS
  20
  21  my $stride_obj = Bio::Structure::SecStr::STRIDE::Res->new( '-file' => 'filename.stride' );
  22
  23  # or
  24
  25  my $stride_obj = Bio::Structure::SecStr::STRIDE::Res->new( '-fh' => \*STDOUT );
  26
  27  # Get secondary structure assignment for PDB residue 20 of chain A
  28  $sec_str = $stride_obj->resSecStr( '20:A' );
  29
  30  # same
  31  $sec_str = $stride_obj->resSecStr( 20, 'A' )
  32
  33 =head1 DESCRIPTION
  34
  35 STRIDE::Res is a module for objectifying STRIDE output.  STRIDE is a
  36 program (similar to DSSP) for assigning secondary structure to
  37 individual residues of a pdb structure file.
  38
  39     ( Knowledge-Based Protein Secondary Structure Assignment,
  40     PROTEINS: Structure, Function, and Genetics 23:566-579 (1995) )
  41
  42 STRIDE is available here:
  43 http://webclu.bio.wzw.tum.de/stride/
  44
  45 Methods are then available for extracting all of the infomation
  46 present within the output or convenient subsets of it.
  47
  48 Although they are very similar in function, DSSP and STRIDE differ
  49 somewhat in output format.  Thes differences are reflected in the
  50 return value of some methods of these modules.  For example, both
  51 the STRIDE and DSSP parsers have resSecStr() methods for returning
  52 the secondary structure of a given residue.  However, the range of
  53 return values for DSSP is ( H, B, E, G, I, T, and S ) whereas the
  54 range of values for STRIDE is ( H, G, I, E, B, b, T, and C ).  See
  55 individual methods for details.
  56
  57 The methods are roughly divided into 3 sections:
  58
  59   1.  Global features of this structure (PDB ID, total surface area,
  60       etc.).  These methods do not require an argument.
  61   2.  Residue specific features ( amino acid, secondary structure,
  62       solvent exposed surface area, etc. ).  These methods do require an
  63       arguement.  The argument is supposed to uniquely identify a
  64       residue described within the structure.  It can be of any of the
  65       following forms:
  66       ('#A:B') or ( #, 'A', 'B' )
  67         || |
  68         || - Chain ID (blank for single chain)
  69         |--- Insertion code for this residue.  Blank for most residues.
  70         |--- Numeric portion of residue ID.
  71
  72       (#)
  73        |
  74        --- Numeric portion of residue ID.  If there is only one chain and
  75            it has no ID AND there is no residue with an insertion code at this
  76            number, then this can uniquely specify a residue.
  77
  78       ('#:C') or ( #, 'C' )
  79         | |
  80         | -Chain ID
  81         ---Numeric portion of residue ID.
  82
  83      If a residue is incompletely specified then the first residue that
  84      fits the arguments is returned.  For example, if 19 is the argument
  85      and there are three chains, A, B, and C with a residue whose number
  86      is 19, then 19:A will be returned (assuming its listed first).
  87
  88      Since neither DSSP nor STRIDE correctly handle alt-loc codes, they
  89      are not supported by these modules.
  90
  91  3.  Value-added methods.  Return values are not verbatem strings
  92      parsed from DSSP or STRIDE output.
  93
  94 =head1 FEEDBACK
  95
  96 =head2 MailingLists
  97
  98 UsUser feedback is an integral part of the evolution of this and other
  99 Bioperl modules. Send your comments and suggestions preferably to one
 100 of the Bioperl mailing lists.  Your participation is much appreciated.
 101
 102   bioperl-l@bioperl.org                  - General discussion
 103   http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
 104
 105 =head2 Support
 106
 107 Please direct usage questions or support issues to the mailing list:
 108
 109 I<bioperl-l@bioperl.org>
 110
 111 rather than to the module maintainer directly. Many experienced and
 112 reponsive experts will be able look at the problem and quickly
 113 address it. Please include a thorough description of the problem
 114 with code and data examples if at all possible.
 115
 116 =head2 Reporting Bugs
 117
 118 Report bugs to the Bioperl bug tracking system to help us keep track
 119 the bugs and their resolution.  Bug reports can be submitted via the
 120 web:
 121
 122   http://bugzilla.open-bio.org/
 123
 124 =head1 AUTHOR - Ed Green
 125
 126 Email ed@compbio.berkeley.edu
 127
 128
 129 =head1 APPENDIX
 130
 131 The Rest of the documentation details each method.
 132 Internal methods are preceded with a _.
 133
 134
 135 =cut
 136
 137 package Bio::Structure::SecStr::STRIDE::Res;
 138 use strict;
 139 use Bio::Root::IO;
 140 use Bio::PrimarySeq;
 141
 142 use base qw(Bio::Root::Root);
 143
 144 our %ASGTable = ( 'aa'         =>  0,
 145                   'resNum'     =>  1,
 146                   'ssAbbr'     =>  2,
 147                   'ssName'     =>  3,
 148                   'phi'        =>  4,
 149                   'psi'        =>  5,
 150                   'surfArea'   =>  6 );
 151
 152 our %AATable = ( 'ALA' => 'A', 'ARG' => 'R', 'ASN' => 'N',
 153                  'ASP' => 'D', 'CYS' => 'C', 'GLN' => 'Q',
 154                  'GLU' => 'E', 'GLY' => 'G', 'HIS' => 'H',
 155                  'ILE' => 'I', 'LEU' => 'L', 'LYS' => 'K',
 156                  'MET' => 'M', 'PHE' => 'F', 'PRO' => 'P',
 157                  'SER' => 'S', 'THR' => 'T', 'TRP' => 'W',
 158                  'TYR' => 'Y', 'VAL' => 'V' );
 159
 160 =head2 new
 161
 162  Title         : new
 163  Usage         : makes new object of this class
 164  Function      : Constructor
 165  Example       : $stride_obj = Bio::Structure::SecStr::STRIDE:Res->new( '-file' =>  filename
 166                                                      # or
 167                                                      '-fh'   => FILEHANDLE )
 168  Returns       : object (ref)
 169  Args          : filename or filehandle( must be proper STRIDE output )
 170
 171 =cut
 172
 173 sub new {
 174     my ( $class, @args ) = @_;
 175     my $self = $class->SUPER::new( @args );
 176     my $io   = Bio::Root::IO->new( @args );
 177     $self->_parse( $io ); # not passing filehandle !
 178     $io->close();
 179     return $self;
 180 }
 181
 182 # GLOBAL FEATURES / INFO / STATS
 183
 184 =head2 totSurfArea
 185
 186  Title         : totSurfArea
 187  Usage         : returns sum of surface areas of all residues of all
 188                  chains considered.  Result is memoized.
 189  Function      :
 190  Example       : $tot_SA = $stride_obj->totSurfArea();
 191  Returns       : scalar
 192  Args          : none
 193
 194
 195 =cut
 196
 197 sub totSurfArea {
 198     my $self = shift;
 199     my $total = 0;
 200     my ( $chain, $res );
 201
 202     if ( $self->{ 'SurfArea' } ) {
 203         return $self->{ 'SurfArea' };
 204     }
 205     else {
 206         foreach $chain ( keys %{$self->{ 'ASG' }} ) {
 207             for ( my $i = 1; $i <= $#{$self->{'ASG'}->{$chain}}; $i++ ) {
 208                 $total +=
 209                     $self->{'ASG'}->{$chain}->[$i]->[$ASGTable{'surfArea'}];
 210             }
 211         }
 212     }
 213
 214     $self->{ 'SurfArea' } = $total;
 215     return $self->{ 'SurfArea' };
 216
 217 }
 218
 219 =head2 numResidues
 220
 221  Title         : numResidues
 222  Usage         : returns total number of residues in all chains or
 223                  just the specified chain
 224  Function      :
 225  Example       : $tot_res = $stride_obj->numResidues();
 226  Returns       : scalar int
 227  Args          : none or chain id
 228
 229
 230 =cut
 231
 232 sub numResidues {
 233     my $self = shift;
 234     my $chain = shift;
 235     my $total = 0;
 236     my $key;
 237     foreach $key ( keys %{$self->{ 'ASG' }} ) {
 238         if ( $chain ) {
 239             if ( $key eq $chain ) {
 240                 $total += $#{$self->{ 'ASG' }{ $key }};
 241             }
 242         }
 243         else {
 244             $total += $#{$self->{ 'ASG' }{ $key }};
 245         }
 246     }
 247     return $total;
 248 }
 249
 250 # STRAIGHT FROM THE PDB ENTRY
 251
 252 =head2 pdbID
 253
 254  Title         : pdbID
 255  Usage         : returns pdb identifier ( 1FJM, e.g. )
 256  Function      :
 257  Example       : $pdb_id = $stride_obj->pdbID();
 258  Returns       : scalar string
 259  Args          : none
 260
 261
 262 =cut
 263
 264 sub pdbID {
 265     my $self = shift;
 266     return $self->{ 'PDB' };
 267 }
 268 =head2 pdbAuthor
 269
 270  Title         : pdbAuthor
 271  Usage         : returns author of this PDB entry
 272  Function      :
 273  Example       : $auth = $stride_obj->pdbAuthor()
 274  Returns       : scalar string
 275  Args          : none
 276
 277
 278 =cut
 279
 280 sub pdbAuthor {
 281     my $self = shift;
 282     return join( ' ', @{ $self->{ 'HEAD' }->{ 'AUT' } } );
 283 }
 284
 285 =head2 pdbCompound
 286
 287  Title         : pdbCompound
 288  Usage         : returns string of what was found on the
 289                  CMP lines
 290  Function      :
 291  Example       : $cmp = $stride_obj->pdbCompound();
 292  Returns       : string
 293  Args          : none
 294
 295
 296 =cut
 297
 298 sub pdbCompound {
 299     my $self = shift;
 300     return join( ' ', @{ $self->{ 'HEAD' }->{ 'CMP' } } );
 301 }
 302
 303 =head2 pdbDate
 304
 305  Title         : pdbDate
 306  Usage         : returns date given in PDB file
 307  Function      :
 308  Example       : $pdb_date = $stride_obj->pdbDate();
 309  Returns       : scalar
 310  Args          : none
 311
 312
 313 =cut
 314
 315 sub pdbDate {
 316     my $self = shift;
 317     return $self->{ 'DATE' };
 318 }
 319
 320 =head2 pdbHeader
 321
 322  Title         : pdbHeader
 323  Usage         : returns string of characters found on the PDB header line
 324  Function      :
 325  Example       : $head = $stride_obj->pdbHeader();
 326  Returns       : scalar
 327  Args          : none
 328
 329
 330 =cut
 331
 332 sub pdbHeader {
 333     my $self = shift;
 334     return $self->{ 'HEAD' }->{ 'HEADER' };
 335 }
 336
 337 =head2 pdbSource
 338
 339  Title         : pdbSource
 340  Usage         : returns string of what was found on SRC lines
 341  Function      :
 342  Example       : $src = $stride_obj->pdbSource();
 343  Returns       : scalar
 344  Args          : none
 345
 346
 347 =cut
 348
 349 sub pdbSource {
 350     my $self = shift;
 351     return join( ' ', @{ $self->{ 'HEAD' }->{ 'SRC' } } );
 352 }
 353
 354 # RESIDUE SPECIFIC ACCESSORS
 355
 356 =head2 resAA
 357
 358  Title         : resAA
 359  Usage         : returns 1 letter abbr. of the amino acid specified by
 360                  the arguments
 361  Function      :
 362  Examples      : $aa = $stride_obj->resAA( RESIDUE_ID );
 363  Returns       : scalar character
 364  Args          : RESIDUE_ID
 365
 366
 367 =cut
 368
 369 sub resAA {
 370     my $self = shift;
 371     my @args = @_;
 372     my ( $ord, $chain ) = $self->_toOrdChain( @args );
 373     return ( $AATable{$self->{'ASG'}->{$chain}->[$ord]->[$ASGTable{'aa'}]} );
 374 }
 375
 376 =head2 resPhi
 377
 378  Title         : resPhi
 379  Usage         : returns phi angle of specified residue
 380  Function      :
 381  Example       : $phi = $stride_obj->resPhi( RESIDUE_ID );
 382  Returns       : scaler
 383  Args          : RESIDUE_ID
 384
 385
 386 =cut
 387
 388 sub resPhi {
 389     my $self = shift;
 390     my @args = @_;
 391     my ( $ord, $chain ) = $self->_toOrdChain( @args );
 392     return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'phi' } ];
 393 }
 394
 395 =head2 resPsi
 396
 397  Title         : resPsi
 398  Usage         : returns psi angle of specified residue
 399  Function      :
 400  Example       : $psi = $stride_obj->resPsi( RESIDUE_ID );
 401  Returns       : scalar
 402  Args          : RESIDUE_ID
 403
 404
 405 =cut
 406
 407 sub resPsi {
 408     my $self = shift;
 409     my @args = @_;
 410     my ( $ord, $chain ) = $self->_toOrdChain( @args );
 411     return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'psi' } ];
 412 }
 413
 414 =head2 resSolvAcc
 415
 416  Title         : resSolvAcc
 417  Usage         : returns stride calculated surface area of specified residue
 418  Function      :
 419  Example       : $sa = $stride_obj->resSolvAcc( RESIDUE_ID );
 420  Returns       : scalar
 421  Args          : RESIDUE_ID
 422
 423
 424 =cut
 425
 426 sub resSolvAcc {
 427     my $self = shift;
 428     my @args = @_;
 429     my ( $ord, $chain ) = $self->_toOrdChain( @args );
 430     return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'surfArea' } ];
 431 }
 432
 433 =head2 resSurfArea
 434
 435  Title         : resSurfArea
 436  Usage         : returns stride calculated surface area of specified residue
 437  Function      :
 438  Example       : $sa = $stride_obj->resSurfArea( RESIDUE_ID );
 439  Returns       : scalar
 440  Args          : RESIDUE_ID
 441
 442
 443 =cut
 444
 445 sub resSurfArea {
 446     my $self = shift;
 447     my @args = @_;
 448     my ( $ord, $chain ) = $self->_toOrdChain( @args );
 449     return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'surfArea' } ];
 450 }
 451
 452 =head2 resSecStr
 453
 454  Title         : resSecStr
 455  Usage         : gives one letter abbr. of stride determined secondary
 456                  structure of specified residue
 457  Function      :
 458  Example       : $ss = $stride_obj->resSecStr( RESIDUE_ID );
 459  Returns       : one of: 'H' => Alpha Helix
 460                          'G' => 3-10 helix
 461                          'I' => PI-helix
 462                          'E' => Extended conformation
 463                          'B' or 'b' => Isolated bridge
 464                          'T' => Turn
 465                          'C' => Coil
 466                          ' ' => None
 467                 # NOTE:  This range is slightly DIFFERENT from the
 468                 #        DSSP method of the same name
 469  Args          : RESIDUE_ID
 470
 471
 472 =cut
 473
 474 sub resSecStr {
 475     my $self = shift;
 476     my @args = @_;
 477     my ( $ord, $chain ) = $self->_toOrdChain( @args );
 478     return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'ssAbbr' } ];
 479 }
 480
 481 =head2 resSecStrSum
 482
 483  Title         : resSecStrSum
 484  Usage         : gives one letter summary of secondary structure of
 485                  specified residue.  More general than secStruc()
 486  Function      :
 487  Example       : $ss_sum = $stride_obj->resSecStrSum( RESIDUE_ID );
 488  Returns       : one of: 'H' (helix), 'B' (beta), 'T' (turn), or 'C' (coil)
 489  Args          : residue identifier(s) ( SEE INTRO NOTE )
 490
 491
 492 =cut
 493
 494 sub resSecStrSum {
 495     my $self = shift;
 496     my @args = @_;
 497     my $ss_char = $self->resSecStr( @args );
 498
 499     if ( $ss_char eq 'H' || $ss_char eq 'G' || $ss_char eq 'I' ) {
 500         return 'H';
 501     }
 502     if ( $ss_char eq 'E' || $ss_char eq 'B' || $ss_char eq 'b' ) {
 503         return 'B';
 504     }
 505     if ( $ss_char eq 'T' ) {
 506         return 'T';
 507     }
 508     else {
 509         return 'C';
 510     }
 511 }
 512
 513 # STRIDE SPECIFIC
 514
 515 =head2 resSecStrName
 516
 517  Title         : resSecStrName
 518  Usage         : gives full name of the secondary structural element
 519                  classification of the specified residue
 520  Function      :
 521  Example       : $ss_name = $stride_obj->resSecStrName( RESIDUE_ID );
 522  Returns       : scalar string
 523  Args          : RESIDUE_ID
 524
 525
 526 =cut
 527
 528 sub resSecStrName {
 529     my $self = shift;
 530     my @args = @_;
 531     my ( $ord, $chain ) = $self->_toOrdChain( @args );
 532     return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'ssName' } ];
 533 }
 534
 535 =head2 strideLocs
 536
 537  Title         : strideLocs
 538  Usage         : returns stride determined contiguous secondary
 539     structural elements as specified on the LOC lines
 540  Function      :
 541  Example       : $loc_pnt = $stride_obj->strideLocs();
 542  Returns       : pointer to array of 5 element arrays.
 543     0 => stride name of structural element
 544     1 => first residue pdb key (including insertion code, if app.)
 545     2 => first residue chain id
 546     3 => last residue pdb key (including insertion code, if app.)
 547     4 => last residue chain id
 548     NOTE the differences between this range and the range of SecBounds()
 549  Args          : none
 550
 551
 552 =cut
 553
 554 sub strideLocs {
 555     my $self = shift;
 556     return $self->{ 'LOC' };
 557 }
 558
 559 # VALUE ADDED METHODS (NOT JUST PARSE/REPORT)
 560
 561 =head2 secBounds
 562
 563  Title         : secBounds
 564  Usage         : gets residue ids of boundary residues in each
 565                  contiguous secondary structural element of specified
 566                  chain
 567  Function      :
 568  Example       : $ss_bound_pnt = $stride_obj->secBounds( 'A' );
 569  Returns       : pointer to array of 3 element arrays.  First two elements
 570                  are the PDB IDs of the start and end points, respectively
 571                  and inclusively.  The last element is the STRIDE secondary
 572                  structural element code (same range as resSecStr).
 573  Args          : chain identifier ( one character ).  If none, '-' is assumed
 574
 575
 576 =cut
 577
 578 sub secBounds {
 579     # Requires a chain name.  If left blank, we assume ' ' which equals '-'
 580     my $self  = shift;
 581     my $chain = shift;
 582     my @SecBounds;
 583
 584     $chain = '-' if ( !( $chain ) || $chain eq ' ' || $chain eq '-' );
 585
 586     # if we've memoized this one, use that
 587     if ( $self->{ 'SecBounds' }->{ $chain } ) {
 588         return $self->{ 'SecBounds' }->{ $chain };
 589     }
 590
 591     #check to make sure chain is valid
 592     if ( !( $self->{ 'ASG' }->{ $chain } ) ) {
 593         $self->throw( "No such chain: $chain\n" );
 594     }
 595
 596     my $cur_element = $self->{ 'ASG' }->{ $chain }->[ 1 ]->
 597         [ $ASGTable{ 'ssAbbr' } ];
 598     my $beg = 1;
 599     my $i;
 600
 601     for ( $i = 2; $i <= $#{$self->{'ASG'}->{$chain}}; $i++ ) {
 602         if ( $self->{ 'ASG' }->{ $chain }->[ $i ]->[ $ASGTable{ 'ssAbbr' } ]
 603              ne $cur_element ) {
 604             push( @SecBounds, [ $beg, $i -1 , $cur_element ] );
 605             $beg = $i;
 606             $cur_element = $self->{ 'ASG' }->{ $chain }->[ $i ]->
 607                 [ $ASGTable{ 'ssAbbr' } ];
 608         }
 609     }
 610
 611     if ( $self->{ 'ASG' }->{ $chain }->[ $i ]->[ $ASGTable{ 'ssAbbr' } ]
 612          eq $cur_element ) {
 613         push( @SecBounds, [ $beg, $i, $cur_element ] );
 614     }
 615     else {
 616         push( @SecBounds, [ $beg, $i - 1, $cur_element ],
 617               [ $i, $i, $self->{ 'ASG' }->{ $chain }->[ $i ]->
 618                 [ $ASGTable{ 'ssAbbr' } ] ] );
 619     }
 620
 621     $self->{ 'SecBounds' }->{ $chain } = \@SecBounds;
 622     return $self->{ 'SecBounds' }->{ $chain };
 623 }
 624
 625 =head2 chains
 626
 627  Title         : chains
 628  Usage         : gives array chain I.D.s (characters)
 629  Function      :
 630  Example       : @chains = $stride_obj->chains();
 631  Returns       : array of characters
 632  Args          : none
 633
 634
 635 =cut
 636
 637 sub chains {
 638     my $self = shift;
 639     my @chains = keys ( %{ $self->{ 'ASG' } } );
 640     return \@chains;
 641 }
 642
 643 =head2 getSeq
 644
 645  Title         : getSeq
 646  Usage         : returns a Bio::PrimarySeq object which represents an
 647                  approximation at the sequence of the specified chain.
 648  Function      : For most chain of most entries, the sequence returned by
 649                  this method will be very good.  However, it it inherently
 650                  unsafe to rely on STRIDE to extract sequence information about
 651                  a PDB entry.  More reliable information can be obtained from
 652                  the PDB entry itself.  If a second option is given
 653                  (and evaluates to true), the sequence generated will
 654                  have 'X' in spaces where the pdb residue numbers are
 655                  discontinuous.  In some cases this results in a
 656                  better sequence object (when the  discontinuity is
 657                  due to regions which were present, but could not be
 658                  resolved).  In other cases, it will result in a WORSE
 659                  sequence object (when the discontinuity is due to
 660                  historical sequence numbering and all sequence is
 661                  actually resolved).
 662  Example       : $pso = $dssp_obj->getSeq( 'A' );
 663  Returns       : (pointer to) a PrimarySeq object
 664  Args          : Chain identifier.  If none given, '-' is assumed.
 665
 666
 667 =cut
 668
 669 sub getSeq {
 670     my $self    = shift;
 671     my $chain   = shift;
 672     my $fill_in = shift;
 673
 674     if ( !( $chain ) ) {
 675         $chain = '-';
 676     }
 677
 678     if ( $self->{ 'Seq' }->{ $chain } ) {
 679         return $self->{ 'Seq' }->{ $chain };
 680     }
 681
 682     my ( $seq,
 683          $num_res,
 684          $last_res_num,
 685          $cur_res_num,
 686          $i,
 687          $step,
 688          $id
 689          );
 690
 691     $seq = "";
 692     $num_res = $self->numResidues( $chain );
 693     $last_res_num = $self->_pdbNum( 1, $chain );
 694     for ( $i = 1; $i <= $num_res; $i++ ) {
 695         if ( $fill_in ) {
 696             $cur_res_num = $self->_pdbNum( $i, $chain );
 697             $step = $cur_res_num - $last_res_num;
 698             if ( $step > 1 ) {
 699                 $seq .= 'X' x ( $step - 1 );
 700             }
 701         }
 702         $seq .= $self->_resAA( $i, $chain );
 703         $last_res_num = $cur_res_num;
 704     }
 705
 706     $id = $self->pdbID();
 707     $id .= "$chain";
 708
 709     $self->{ 'Seq' }->{ $chain } = Bio::PrimarySeq->new( -seq => $seq,
 710                                                          -id  => $id,
 711                                                          -moltype => 'protein'
 712                                                          );
 713
 714     return $self->{ 'Seq' }->{ $chain };
 715 }
 716
 717 =head1 INTERNAL METHODS
 718
 719 =head2 _pdbNum
 720
 721  Title        : _pdbNum
 722  Usage        : fetches the numeric portion of the identifier for a given
 723                 residue as reported by the pdb entry.  Note, this DOES NOT
 724                 uniquely specify a residue.  There may be an insertion code
 725                 and/or chain identifier differences.
 726  Function     :
 727  Example      : $pdbNum = $self->pdbNum( 3, 'A' );
 728  Returns      : a scalar
 729  Args         : valid ordinal num / chain combination
 730
 731
 732 =cut
 733
 734 sub _pdbNum {
 735     my $self  = shift;
 736     my $ord   = shift;
 737     my $chain = shift;
 738     if ( !( $self->{ 'ASG' }->{ $chain }->[ $ord ] ) ) {
 739         $self->throw( "No such ordinal $ord in chain $chain.\n" );
 740     }
 741     my $pdb_junk = $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'resNum' } ];
 742     my $num_part;
 743     ( $num_part ) = ( $pdb_junk =~ /(-*\d+).*/ );
 744     return $num_part;
 745 }
 746
 747 =head2 _resAA
 748
 749  Title         : _resAA
 750  Usage         : returns 1 letter abbr. of the amino acid specified by
 751                  the arguments
 752  Function      :
 753  Examples      : $aa = $stride_obj->_resAA( 3, '-' );
 754  Returns       : scalar character
 755  Args          : ( ord. num, chain )
 756
 757
 758 =cut
 759
 760 sub _resAA {
 761     my $self  = shift;
 762     my $ord   = shift;
 763     my $chain = shift;
 764     if ( !( $self->{ 'ASG' }->{ $chain }->[ $ord ] ) ) {
 765         $self->throw( "No such ordinal $ord in chain $chain.\n" );
 766     }
 767     return ( $AATable{$self->{'ASG'}->{$chain}->[$ord]->[$ASGTable{'aa'}]} );
 768 }
 769
 770 =head2 _pdbInsCo
 771
 772  Title        : _pdbInsCo
 773  Usage        : fetches the Insertion code for this residue.
 774  Function     :
 775  Example      : $pdb_ins_co = $self->_pdb_ins_co( 15, 'B' );
 776  Returns      : a scalar
 777  Args         : ordinal number and chain
 778
 779
 780 =cut
 781
 782 sub _pdbInsCo {
 783     my $self  = shift;
 784     my $ord   = shift;
 785     my $chain = shift;
 786     if ( !( $self->{ 'ASG' }->{ $chain }->[ $ord ] ) ) {
 787         $self->throw( "No such ordinal $ord in chain $chain.\n" );
 788     }
 789     my $pdb_junk = $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'resNum' } ];
 790     my $letter_part;
 791     ( $letter_part ) = ( $pdb_junk =~ /\d+(\D+)/ ); # insertion code can be any
 792                                                     # non-word character(s)
 793     return $letter_part;
 794 }
 795
 796 =head2 _toOrdChain
 797
 798  Title         : _toOrdChain
 799  Usage         : takes any set of residue identifying parameters and
 800     wrestles them into a two element array:  the chain and the ordinal
 801     number of this residue.  This two element array can then be
 802     efficiently used as keys in many of the above accessor methods
 803  ('#A:B') or ( #, 'A', 'B' )
 804    || |
 805    || - Chain ID (blank for single chain)
 806    |--- Insertion code for this residue.  Blank for most residues.
 807    |--- Numeric portion of residue ID.
 808
 809   (#)
 810    |
 811    --- Numeric portion of residue ID.  If there is only one chain and
 812    it has no ID AND there is no residue with an insertion code at this
 813    number, then this can uniquely specify a residue.
 814
 815   #  ('#:C) or ( #, 'C' )
 816        | |
 817        | -Chain ID
 818        ---Numeric portion of residue ID.
 819
 820   If a residue is incompletely specified then the first residue that
 821   fits the arguments is returned.  For example, if 19 is the argument
 822   and there are three chains, A, B, and C with a residue whose number
 823   is 19, then 19:A will be returned (assuming its listed first).
 824
 825  Function      :
 826  Example       : my ( $ord, $chain ) = $self->_toOrdChain( @args );
 827  Returns       : two element array
 828  Args          : valid set of residue identifier(s) ( SEE NOTE ABOVE )
 829
 830
 831 =cut
 832
 833 sub _toOrdChain {
 834     my $self = shift;
 835     my $arg_str;
 836
 837     my ( $key_num, $chain_id, $ins_code, $key, $i );
 838
 839     # check to see how many args are given
 840     if ( $#_ >= 1 ) { # multiple args
 841         $key_num = shift;
 842         if ( $#_ >= 1 ) { # still multiple args => ins. code, too
 843             $ins_code = shift;
 844             $chain_id = shift;
 845         }
 846         else { # just one more arg. => chain_id
 847             $chain_id = shift;
 848         }
 849     }
 850     else { # only single arg.  Might be number or string
 851         $arg_str = shift;
 852         if ( $arg_str =~ /:/ ) {
 853             # a chain is specified
 854             ( $chain_id ) = ( $arg_str =~ /:(.)/);
 855             $arg_str =~ s/:.//;
 856         }
 857         if ( $arg_str =~ /[A-Z]|[a-z]/ ) {
 858             # an insertion code is specified
 859             ( $ins_code ) = ( $arg_str =~ /([A-Z]|[a-z])/ );
 860             $arg_str =~ s/[A-Z]|[a-z]//g;
 861         }
 862         #now, get the number bit-> everything still around
 863         $key_num = $arg_str;
 864     }
 865
 866     $key = "$key_num$ins_code";
 867     if ( !( $chain_id ) || $chain_id eq ' ' ) {
 868         $chain_id = '-';
 869     }
 870
 871     if ( !( $self->{ 'ASG' }->{ $chain_id } ) ) {
 872         $self->throw( "No such chain: $chain_id" );
 873     }
 874
 875     for ( $i = 1; $i <= $#{$self->{ 'ASG' }->{ $chain_id }}; $i++ ) {
 876         if ( $self->{ 'ASG' }->{ $chain_id }->[ $i ]->[ $ASGTable{ 'resNum' } ] eq
 877              $key ) {
 878             return ( $i, $chain_id );
 879         }
 880     }
 881
 882     $self->throw( "No such key: $key" );
 883
 884 }
 885
 886 =head2 _parse
 887
 888  Title         : _parse
 889  Usage         : as name suggests, parses stride output, creating object
 890  Function      :
 891  Example       : $self->_parse( $io );
 892  Returns       :
 893  Args          : valid Bio::Root::IO object
 894
 895
 896 =cut
 897
 898 sub _parse {
 899     my $self = shift;
 900     my $io = shift;
 901     my $file = $io->_fh();
 902
 903     # Parse top lines
 904         if ( $self->_parseTop( $io ) ) {
 905         $self->throw( "Not stride output" );
 906     }
 907
 908     # Parse the HDR, CMP, SCR, and AUT lines
 909     $self->_parseHead( $io );
 910
 911     # Parse the CHN, SEQ, STR, and LOC  lines
 912     $self->_parseSummary( $io ); # we're ignoring this
 913
 914     # Parse the ASG lines
 915     $self->_parseASG( $io );
 916 }
 917
 918 =head2 _parseTop
 919
 920  Title         : _parseTop
 921  Usage         : makes sure this looks like stride output
 922  Function      :
 923  Example       :
 924  Returns       :
 925  Args          :
 926
 927
 928 =cut
 929
 930 sub _parseTop {
 931     my $self = shift;
 932     my $io = shift;
 933     my $file = $io->_fh();
 934     my $cur = <$file>;
 935     if ( $cur =~ /^REM  ---/ ) {
 936         return 0;
 937     }
 938     return 1;
 939 }
 940
 941 =head2 _parseHead
 942
 943  Title         : _parseHead
 944  Usage         : parses
 945  Function      : HDR, CMP, SRC, and AUT lines
 946  Example       :
 947  Returns       :
 948  Args          :
 949
 950
 951 =cut
 952
 953 sub _parseHead {
 954     my $self = shift;
 955     my $io = shift;
 956     my $file = $io->_fh();
 957     my $cur;
 958     my $element;
 959     my ( @elements, @cmp, @src, @aut );
 960     my %head = {};
 961     my $still_head = 1;
 962
 963     $cur = <$file>;
 964     while ( $cur =~ /^REM / ) {
 965         $cur = <$file>;
 966     }
 967
 968     if ( $cur =~ /^HDR / ) {
 969         @elements = split( /\s+/, $cur );
 970         shift( @elements );
 971         pop( @elements );
 972         $self->{ 'PDB' }  = pop( @elements );
 973         $self->{ 'DATE' } = pop( @elements );
 974         # now, everything else is "header" except for the word
 975         # HDR
 976         $element = join( ' ', @elements );
 977         $head{ 'HEADER' } = $element;
 978     }
 979
 980     $cur = <$file>;
 981     while ( $cur =~ /^CMP / ) {
 982         ( $cur ) = ( $cur =~ /^CMP\s+(.+?)\s*\w{4}$/ );
 983         push( @cmp, $cur );
 984         $cur = <$file>;
 985     }
 986
 987     while ( $cur =~ /^SRC / ) {
 988         ( $cur ) = ( $cur =~ /^SRC\s+(.+?)\s*\w{4}$/ );
 989         push( @src, $cur );
 990         $cur = <$file>;
 991     }
 992
 993     while ( $cur =~ /^AUT / ) {
 994         ( $cur ) = ( $cur =~ /^AUT\s+(.+?)\s*\w{4}$/ );
 995         push( @aut, $cur );
 996         $cur = <$file>;
 997     }
 998
 999     $head{ 'CMP' } = \@cmp;
1000     $head{ 'SRC' } = \@src;
1001     $head{ 'AUT' } = \@aut;
1002     $self->{ 'HEAD' } = \%head;
1003 }
1004
1005 =head2 _parseSummary
1006
1007  Title         : _parseSummary
1008  Usage         : parses LOC lines
1009  Function      :
1010  Example       :
1011  Returns       :
1012  Args          :
1013
1014
1015 =cut
1016
1017 sub _parseSummary {
1018     my $self = shift;
1019     my $io = shift;
1020     my $file = $io->_fh();
1021     my $cur = <$file>;
1022     my $bound_set;
1023     my $element;
1024     my ( @elements, @cur );
1025     my @LOC_lookup = ( [ 5,  12 ],   # Element name
1026         # reduntdant           [ 18, 3 ],    # First residue name
1027                        [ 22, 5 ],    # First residue PDB number
1028                        [ 28, 1 ],    # First residue Chain ID
1029         # redundant            [ 35, 3 ],    # Last residue name
1030                        [ 40, 5 ],    # Last residue PDB number
1031                        [ 46, 1 ]  ); # Last residue Chain ID
1032
1033     #ignore these lines
1034     while ( $cur =~ /^REM |^STR |^SEQ |^CHN / ) {
1035         $cur = <$file>;
1036     }
1037
1038     while ( $cur =~ /^LOC / ) {
1039         foreach $bound_set ( @LOC_lookup ) {
1040             $element = substr( $cur, $bound_set->[ 0 ], $bound_set->[ 1 ] );
1041             $element =~ s/\s//g;
1042             push( @cur, $element );
1043         }
1044         push( @elements, [ @cur ] );
1045         $cur = <$file>;
1046         @cur = ();
1047     }
1048     $self->{ 'LOC' } = \@elements;
1049
1050 }
1051
1052 =head2 _parseASG
1053
1054  Title         : _parseASG
1055  Usage         : parses ASG lines
1056  Function      :
1057  Example       :
1058  Returns       :
1059  Args          :
1060
1061
1062 =cut
1063
1064 sub _parseASG {
1065     my $self = shift;
1066     my $io = shift;
1067     my $file = $io->_fh();
1068     my $cur = <$file>;
1069     my $bound_set;
1070     my $ord_num;
1071     my ( $chain, $last_chain );
1072     my $element;
1073     my %ASG;
1074     my ( @cur, @elements );
1075     my @ASG_lookup = ( [ 5,  3 ],  # Residue name
1076                   #    [ 9,  1 ],  # Chain ID
1077                        [ 10, 5 ],  # PDB residue number (w/ins.code)
1078                   #    [ 16, 4 ],  # ordinal stride number
1079                        [ 24, 1 ],  # one letter sec. stru. abbr.
1080                        [ 26, 13],  # full sec. stru. name
1081                        [ 42, 7 ],  # phi angle
1082                        [ 52, 7 ],  # psi angle
1083                        [ 64, 5 ] );# residue solv. acc.
1084
1085     while ( $cur =~ /^REM / ) {
1086         $cur = <$file>;
1087     }
1088
1089     while ( $cur =~ /^ASG / ) {
1090         # get ordinal number for array key
1091         $ord_num = substr( $cur, 16, 4 );
1092         $ord_num =~ s/\s//g;
1093
1094         # get the chain id
1095         $chain = substr( $cur, 9, 1 );
1096
1097         if ( $last_chain && ( $chain ne $last_chain ) ) {
1098             $ASG{ $last_chain } = [ @elements ];
1099             @elements = ();
1100         }
1101
1102         # now get the rest of the info on this line
1103         foreach $bound_set ( @ASG_lookup ) {
1104             $element = substr( $cur, $bound_set->[ 0 ],
1105                                $bound_set->[ 1 ] );
1106             $element =~ s/\s//g;
1107             push( @cur, $element );
1108         }
1109         $elements[ $ord_num ] = [ @cur ];
1110         $cur = <$file>;
1111         @cur = ();
1112         $last_chain = $chain;
1113     }
1114
1115     $ASG{ $chain } = [ @elements ];
1116
1117     $self->{ 'ASG' } = \%ASG;
1118 }
1119
1120 1;
1121
1122
1123