Bio/PrimarySeqI.pm

   1 # $Id$
   2 #
   3 # BioPerl module for Bio::PrimarySeqI
   4 #
   5 # Cared for by Ewan Birney <birney@ebi.ac.uk>
   6 #
   7 # Copyright Ewan Birney
   8 #
   9 # You may distribute this module under the same terms as perl itself
  10
  11 # POD documentation - main docs before the code
  12
  13 =head1 NAME
  14
  15 Bio::PrimarySeqI - Interface definition for a Bio::PrimarySeq
  16
  17 =head1 SYNOPSIS
  18
  19     # Bio::PrimarySeqI is the interface class for sequences.
  20     # If you are a newcomer to bioperl, you might want to start with
  21     # Bio::Seq documentation.
  22
  23     # Test if this is a seq object
  24     $obj->isa("Bio::PrimarySeqI") ||
  25       $obj->throw("$obj does not implement the Bio::PrimarySeqI interface");
  26
  27     # Accessors
  28     $string    = $obj->seq();
  29     $substring = $obj->subseq(12,50);
  30     $display   = $obj->display_id();       # for human display
  31     $id        = $obj->primary_id();       # unique id for this object,
  32                                            # implementation defined
  33     $unique_key= $obj->accession_number(); # unique biological id
  34
  35
  36     # Object manipulation
  37     eval {
  38            $rev = $obj->revcom();
  39     };
  40     if( $@ ) {
  41            $obj->throw("Could not reverse complement. ".
  42                     "Probably not DNA. Actual exception\n$@\n");
  43     }
  44
  45     $trunc = $obj->trunc(12,50);
  46     # $rev and $trunc are Bio::PrimarySeqI compliant objects
  47
  48
  49 =head1 DESCRIPTION
  50
  51 This object defines an abstract interface to basic sequence
  52 information - for most users of the package the documentation (and
  53 methods) in this class are not useful - this is a developers-only
  54 class which defines what methods have to be implmented by other Perl
  55 objects to comply to the Bio::PrimarySeqI interface. Go "perldoc
  56 Bio::Seq" or "man Bio::Seq" for more information on the main class for
  57 sequences.
  58
  59 PrimarySeq is an object just for the sequence and its name(s), nothing
  60 more. Seq is the larger object complete with features. There is a pure
  61 perl implementation of this in L<Bio::PrimarySeq>. If you just want to
  62 use L<Bio::PrimarySeq> objects, then please read that module first. This
  63 module defines the interface, and is of more interest to people who
  64 want to wrap their own Perl Objects/RDBs/FileSystems etc in way that
  65 they "are" bioperl sequence objects, even though it is not using Perl
  66 to store the sequence etc.
  67
  68 This interface defines what bioperl considers necessary to "be" a
  69 sequence, without providing an implementation of this, an
  70 implementation is provided in L<Bio::PrimarySeq>. If you want to provide
  71 a Bio::PrimarySeq-compliant object which in fact wraps another
  72 object/database/out-of-perl experience, then this is the correct thing
  73 to wrap, generally by providing a wrapper class which would inherit
  74 from your object and this Bio::PrimarySeqI interface. The wrapper class
  75 then would have methods lists in the "Implementation Specific
  76 Functions" which would provide these methods for your object.
  77
  78 =head1 FEEDBACK
  79
  80 =head2 Mailing Lists
  81
  82 User feedback is an integral part of the evolution of this and other
  83 Bioperl modules. Send your comments and suggestions preferably to one
  84 of the Bioperl mailing lists.  Your participation is much appreciated.
  85
  86   bioperl-l@bioperl.org                  - General discussion
  87   http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
  88
  89 =head2 Reporting Bugs
  90
  91 Report bugs to the Bioperl bug tracking system to help us keep track
  92 the bugs and their resolution.  Bug reports can be submitted via the
  93 web:
  94
  95   http://bugzilla.open-bio.org/
  96
  97 =head1 AUTHOR - Ewan Birney
  98
  99 Email birney@ebi.ac.uk
 100
 101 =head1 APPENDIX
 102
 103 The rest of the documentation details each of the object
 104 methods. Internal methods are usually preceded with a _
 105
 106 =cut
 107
 108
 109 # Let the code begin...
 110
 111
 112 package Bio::PrimarySeqI;
 113 use strict;
 114 use Bio::Tools::CodonTable;
 115
 116 use base qw(Bio::Root::RootI);
 117
 118 =head1 Implementation Specific Functions
 119
 120 These functions are the ones that a specific implementation must
 121 define.
 122
 123 =head2 seq
 124
 125  Title   : seq
 126  Usage   : $string = $obj->seq()
 127  Function: Returns the sequence as a string of letters. The
 128            case of the letters is left up to the implementer.
 129            Suggested cases are upper case for proteins and lower case for
 130            DNA sequence (IUPAC standard), but implementations are suggested to
 131            keep an open mind about case (some users... want mixed case!)
 132  Returns : A scalar
 133  Status  : Virtual
 134
 135 =cut
 136
 137 sub seq {
 138    my ($self) = @_;
 139    $self->throw_not_implemented();
 140 }
 141
 142 =head2 subseq
 143
 144  Title   : subseq
 145  Usage   : $substring = $obj->subseq(10,40);
 146  Function: Returns the subseq from start to end, where the first base
 147            is 1 and the number is inclusive, i.e. 1-2 are the first two
 148            bases of the sequence.
 149
 150            Start cannot be larger than end but can be equal.
 151
 152  Returns : A string
 153  Args    :
 154  Status  : Virtual
 155
 156 =cut
 157
 158 sub subseq{
 159    my ($self) = @_;
 160    $self->throw_not_implemented();
 161 }
 162
 163 =head2 display_id
 164
 165  Title   : display_id
 166  Usage   : $id_string = $obj->display_id();
 167  Function: Returns the display id, also known as the common name of the Sequence
 168            object.
 169
 170            The semantics of this is that it is the most likely string
 171            to be used as an identifier of the sequence, and likely to
 172            have "human" readability.  The id is equivalent to the ID
 173            field of the GenBank/EMBL databanks and the id field of the
 174            Swissprot/sptrembl database. In fasta format, the >(\S+) is
 175            presumed to be the id, though some people overload the id
 176            to embed other information. Bioperl does not use any
 177            embedded information in the ID field, and people are
 178            encouraged to use other mechanisms (accession field for
 179            example, or extending the sequence object) to solve this.
 180
 181            Notice that $seq->id() maps to this function, mainly for
 182            legacy/convenience reasons.
 183  Returns : A string
 184  Args    : None
 185  Status  : Virtual
 186
 187
 188 =cut
 189
 190 sub display_id {
 191    my ($self) = @_;
 192    $self->throw_not_implemented();
 193 }
 194
 195
 196 =head2 accession_number
 197
 198  Title   : accession_number
 199  Usage   : $unique_biological_key = $obj->accession_number;
 200  Function: Returns the unique biological id for a sequence, commonly
 201            called the accession_number. For sequences from established
 202            databases, the implementors should try to use the correct
 203            accession number. Notice that primary_id() provides the
 204            unique id for the implemetation, allowing multiple objects
 205            to have the same accession number in a particular implementation.
 206
 207            For sequences with no accession number, this method should return
 208            "unknown".
 209  Returns : A string
 210  Args    : None
 211  Status  : Virtual
 212
 213
 214 =cut
 215
 216 sub accession_number {
 217    my ($self,@args) = @_;
 218    $self->throw_not_implemented();
 219 }
 220
 221
 222
 223 =head2 primary_id
 224
 225  Title   : primary_id
 226  Usage   : $unique_implementation_key = $obj->primary_id;
 227  Function: Returns the unique id for this object in this
 228            implementation. This allows implementations to manage their
 229            own object ids in a way the implementaiton can control
 230            clients can expect one id to map to one object.
 231
 232            For sequences with no accession number, this method should
 233            return a stringified memory location.
 234
 235  Returns : A string
 236  Args    : None
 237  Status  : Virtual
 238
 239
 240 =cut
 241
 242 sub primary_id {
 243    my ($self,@args) = @_;
 244    $self->throw_not_implemented();
 245 }
 246
 247
 248 =head2 can_call_new
 249
 250  Title   : can_call_new
 251  Usage   : if( $obj->can_call_new ) {
 252              $newobj = $obj->new( %param );
 253          }
 254  Function: Can_call_new returns 1 or 0 depending
 255            on whether an implementation allows new
 256            constructor to be called. If a new constructor
 257            is allowed, then it should take the followed hashed
 258            constructor list.
 259
 260            $myobject->new( -seq => $sequence_as_string,
 261                            -display_id  => $id
 262                            -accession_number => $accession
 263                            -alphabet => 'dna',
 264                            );
 265  Returns : 1 or 0
 266  Args    :
 267
 268
 269 =cut
 270
 271 sub can_call_new{
 272    my ($self,@args) = @_;
 273
 274    # we default to 0 here
 275
 276    return 0;
 277 }
 278
 279 =head2 alphabet
 280
 281  Title   : alphabet
 282  Usage   : if( $obj->alphabet eq 'dna' ) { /Do Something/ }
 283  Function: Returns the type of sequence being one of
 284            'dna', 'rna' or 'protein'. This is case sensitive.
 285
 286            This is not called "type" because this would cause
 287            upgrade problems from the 0.5 and earlier Seq objects.
 288
 289  Returns : A string either 'dna','rna','protein'. NB - the object must
 290            make a call of the alphabet, if there is no alphabet specified it
 291            has to guess.
 292  Args    : None
 293  Status  : Virtual
 294
 295
 296 =cut
 297
 298 sub alphabet{
 299     my ( $self ) = @_;
 300     $self->throw_not_implemented();
 301 }
 302
 303 =head2 moltype
 304
 305  Title   : moltype
 306  Usage   : Deprecated. Use alphabet() instead.
 307
 308 =cut
 309
 310 sub moltype{
 311    my ($self,@args) = @_;
 312
 313    $self->warn("moltype: pre v1.0 method. Calling alphabet() instead...");
 314    $self->alphabet(@args);
 315 }
 316
 317
 318 =head1 Optional Implementation Functions
 319
 320 The following functions rely on the above functions. An
 321 implementing class does not need to provide these functions, as they
 322 will be provided by this class, but is free to override these
 323 functions.
 324
 325 The revcom(), trunc(), and translate() methods create new sequence
 326 objects. They will call new() on the class of the sequence object
 327 instance passed as argument, unless can_call_new() returns FALSE. In
 328 the latter case a Bio::PrimarySeq object will be created. Implementors
 329 which really want to control how objects are created (eg, for object
 330 persistence over a database, or objects in a CORBA framework), they
 331 are encouraged to override these methods
 332
 333 =head2 revcom
 334
 335  Title   : revcom
 336  Usage   : $rev = $seq->revcom()
 337  Function: Produces a new Bio::PrimarySeqI implementing object which
 338            is the reversed complement of the sequence. For protein
 339            sequences this throws an exception of "Sequence is a
 340            protein. Cannot revcom".
 341
 342            The id is the same id as the original sequence, and the
 343            accession number is also indentical. If someone wants to
 344            track that this sequence has be reversed, it needs to
 345            define its own extensionsj.
 346
 347            To do an inplace edit of an object you can go:
 348
 349            $seq = $seq->revcom();
 350
 351            This of course, causes Perl to handle the garbage
 352            collection of the old object, but it is roughly speaking as
 353            efficient as an inplace edit.
 354
 355  Returns : A new (fresh) Bio::PrimarySeqI object
 356  Args    : None
 357
 358
 359 =cut
 360
 361 sub revcom{
 362    my ($self) = @_;
 363
 364
 365    # check the type is good first.
 366    my $t = $self->alphabet;
 367
 368    if( $t eq 'protein' ) {
 369        $self->throw("Sequence is a protein. Cannot revcom");
 370    }
 371
 372    if( $t ne 'dna' && $t ne 'rna' ) {
 373        if( $self->can('warn') ) {
 374            $self->warn("Sequence is not dna or rna, but [$t]. ".
 375                        "Attempting to revcom, but unsure if this is right");
 376        } else {
 377            warn("[$self] Sequence is not dna or rna, but [$t]. ".
 378                 "Attempting to revcom, but unsure if this is right");
 379        }
 380    }
 381
 382    # yank out the sequence string
 383
 384    my $str = $self->seq();
 385
 386    # if is RNA - map to DNA then map back
 387
 388    if( $t eq 'rna' ) {
 389        $str =~ tr/uU/tT/;
 390    }
 391
 392    # revcom etc...
 393
 394    $str =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/;
 395    my $revseq = CORE::reverse $str;
 396
 397    if( $t eq 'rna' ) {
 398        $revseq =~ tr/tT/uU/;
 399    }
 400
 401    my $seqclass;
 402    if($self->can_call_new()) {
 403        $seqclass = ref($self);
 404    } else {
 405        $seqclass = 'Bio::PrimarySeq';
 406        $self->_attempt_to_load_Seq();
 407    }
 408    my $out = $seqclass->new( '-seq' => $revseq,
 409                              '-display_id'  => $self->display_id,
 410                              '-accession_number' => $self->accession_number,
 411                              '-alphabet' => $self->alphabet,
 412                              '-desc' => $self->desc(),
 413                              '-verbose' => $self->verbose
 414                              );
 415    return $out;
 416
 417 }
 418
 419 =head2 trunc
 420
 421  Title   : trunc
 422  Usage   : $subseq = $myseq->trunc(10,100);
 423  Function: Provides a truncation of a sequence.
 424  Returns : A fresh Bio::PrimarySeqI implementing object.
 425  Args    : Two integers denoting first and last base of the sub-sequence.
 426
 427
 428 =cut
 429
 430 sub trunc{
 431    my ($self,$start,$end) = @_;
 432
 433    my $str;
 434    if( defined $start && ref($start) &&
 435        $start->isa('Bio::LocationI') ) {
 436        $str = $self->subseq($start); # start is a location actually
 437    } elsif( !$end ) {
 438        $self->throw("trunc start,end -- there was no end for $start");
 439    } elsif( $end < $start ) {
 440        my $msg = "start [$start] is greater than end [$end]. \n".
 441            "If you want to truncated and reverse complement, \n".
 442                "you must call trunc followed by revcom. Sorry.";
 443        $self->throw($msg);
 444    } else {
 445        $str = $self->subseq($start,$end);
 446    }
 447
 448    my $seqclass;
 449    if($self->can_call_new()) {
 450        $seqclass = ref($self);
 451    } else {
 452        $seqclass = 'Bio::PrimarySeq';
 453        $self->_attempt_to_load_Seq();
 454    }
 455
 456    my $out = $seqclass->new( '-seq' => $str,
 457                              '-display_id'  => $self->display_id,
 458                              '-accession_number' => $self->accession_number,
 459                              '-alphabet' => $self->alphabet,
 460                              '-desc' => $self->desc(),
 461                              '-verbose' => $self->verbose
 462                              );
 463    return $out;
 464 }
 465
 466
 467 =head2 translate
 468
 469  Title   : translate
 470  Usage   : $protein_seq_obj = $dna_seq_obj->translate
 471
 472            Or if you expect a complete coding sequence (CDS) translation,
 473            with inititator at the beginning and terminator at the end:
 474
 475            $protein_seq_obj = $cds_seq_obj->translate(-complete => 1);
 476
 477            Or if you want translate() to find the first initiation
 478            codon and return the corresponding protein:
 479
 480            $protein_seq_obj = $cds_seq_obj->translate(-orf => 1);
 481
 482  Function: Provides the translation of the DNA sequence using full
 483            IUPAC ambiguities in DNA/RNA and amino acid codes.
 484
 485            The complete CDS translation is identical to EMBL/TREMBL
 486            database translation. Note that the trailing terminator
 487            character is removed before returning the translated protein
 488            object.
 489
 490            Note: if you set $dna_seq_obj->verbose(1) you will get a
 491            warning if the first codon is not a valid initiator.
 492
 493  Returns : A Bio::PrimarySeqI implementing object
 494  Args    : -terminator    - character for terminator        default is *
 495            -unknown       - character for unknown           default is X
 496            -frame         - frame                           default is 0
 497            -codontable_id - codon table id                  default is 1
 498            -complete      - complete CDS expected           default is 0
 499            -throw         - throw exception if not complete default is 0
 500            -orf           - find 1st ORF                    default is 0
 501            -start         - alternative initiation codon
 502            -codontable    - Bio::Tools::CodonTable object
 503                    -offset        - offset for fuzzy locations      default is 0
 504
 505  Notes   : The -start argument only applies when -orf is set to 1. By default
 506            all initiation codons found in the given codon table are used
 507            but when "start" is set to some codon this codon will be used
 508            exclusively as the initiation codon. Note that the default codon
 509            table (NCBI "Standard") has 3 initiation codons!
 510
 511            By default translate() translates termination codons to
 512            the some character (default is *), both internal and trailing
 513            codons. Setting "-complete" to 1 tells translate() to remove
 514            the trailing character.
 515
 516                    -offset is used for seqfeatures which contain the the \codon_start
 517                    tag and can be set to 1, 2, or 3.  This is the offset by which the
 518                    sequence translation starts relative to the first base of the
 519                    feature
 520
 521 For details on codon tables used by translate() see L<Bio::Tools::CodonTable>.
 522
 523            Deprecated argument set (v. 1.5.1 and prior versions)
 524            where each argument is an element in an array:
 525
 526            1: character for terminator (optional), defaults to '*'.
 527            2: character for unknown amino acid (optional), defaults to 'X'.
 528            3: frame (optional), valid values are 0, 1, 2, defaults to 0.
 529            4: codon table id (optional), defaults to 1.
 530            5: complete coding sequence expected, defaults to 0 (false).
 531            6: boolean, throw exception if not complete coding sequence
 532               (true), defaults to warning (false)
 533            7: codontable, a custom Bio::Tools::CodonTable object (optional).
 534
 535 =cut
 536
 537 sub translate {
 538          my ($self,@args) = @_;
 539          my ($terminator, $unknown, $frame, $codonTableId, $complete, $throw,
 540                   $codonTable, $orf, $start_codon, $offset);
 541
 542          ## new API with named parameters, post 1.5.1
 543          if ($args[0] && $args[0] =~ /^-[A-Z]+/i) {
 544                  ($terminator, $unknown, $frame, $codonTableId, $complete, $throw,
 545                   $codonTable, $orf, $start_codon, $offset) =
 546                          $self->_rearrange([qw(TERMINATOR
 547                                                                 UNKNOWN
 548                                                                 FRAME
 549                                                                 CODONTABLE_ID
 550                                                                 COMPLETE
 551                                                                 THROW
 552                                                                 CODONTABLE
 553                                                                 ORF
 554                                                                 START
 555                                                                 OFFSET)], @args);
 556          ## old API, 1.5.1 and preceding versions
 557          } else {
 558                  ($terminator, $unknown, $frame, $codonTableId,
 559                   $complete, $throw, $codonTable, $offset) = @args;
 560          }
 561
 562     ## Initialize termination codon, unknown codon, codon table id, frame
 563     $terminator = '*'    unless (defined($terminator) and $terminator ne '');
 564     $unknown = "X"       unless (defined($unknown) and $unknown ne '');
 565     $frame = 0           unless (defined($frame) and $frame ne '');
 566     $codonTableId = 1    unless (defined($codonTableId) and $codonTableId ne '');
 567
 568     ## Get a CodonTable, error if custom CodonTable is invalid
 569     if ($codonTable) {
 570                  $self->throw("Need a Bio::Tools::CodonTable object, not ". $codonTable)
 571                         unless $codonTable->isa('Bio::Tools::CodonTable');
 572     } else {
 573                  $codonTable = Bio::Tools::CodonTable->new( -id => $codonTableId);
 574          }
 575
 576     ## Error if alphabet is "protein"
 577     $self->throw("Can't translate an amino acid sequence.") if
 578                 ($self->alphabet =~ /protein/i);
 579
 580     ## Error if -start parameter isn't a valid codon
 581          if ($start_codon) {
 582                  $self->throw("Invalid start codon: $start_codon.") if
 583                         ( $start_codon !~ /^[A-Z]{3}$/i );
 584          }
 585
 586          my $seq;
 587
 588          if ($offset) {
 589                 $self->throw("Offset must be 1, 2, or 3.") if
 590                     ( $offset !~ /^[123]$/ );
 591                 my ($start, $end) = ($offset, $self->length);
 592                 ($seq) = $self->subseq($start, $end);
 593          } else {
 594                 ($seq) = $self->seq();
 595          }
 596
 597     ## ignore frame if an ORF is supposed to be found
 598          if ($orf) {
 599                  $seq = $self->_find_orf($seq,$codonTable,$start_codon);
 600          } else {
 601          ## use frame, error if frame is not 0, 1 or 2
 602                  $self->throw("Valid values for frame are 0, 1, or 2, not $frame.")
 603                         unless ($frame == 0 or $frame == 1 or $frame == 2);
 604                  $seq = substr($seq,$frame);
 605     }
 606
 607     ## Translate it
 608     my $output = $codonTable->translate($seq);
 609     # Use user-input terminator/unknown
 610     $output =~ s/\*/$terminator/g;
 611     $output =~ s/X/$unknown/g;
 612
 613     ## Only if we are expecting to translate a complete coding region
 614     if ($complete) {
 615                  my $id = $self->display_id;
 616                  # remove the terminator character
 617                  if( substr($output,-1,1) eq $terminator ) {
 618                          chop $output;
 619                  } else {
 620                          $throw && $self->throw("Seq [$id]: Not using a valid terminator codon!");
 621                          $self->warn("Seq [$id]: Not using a valid terminator codon!");
 622                  }
 623                  # test if there are terminator characters inside the protein sequence!
 624                  if ($output =~ /\*/) {
 625                          $throw && $self->throw("Seq [$id]: Terminator codon inside CDS!");
 626                          $self->warn("Seq [$id]: Terminator codon inside CDS!");
 627                  }
 628                  # if the initiator codon is not ATG, the amino acid needs to be changed to M
 629                  if ( substr($output,0,1) ne 'M' ) {
 630                          if ($codonTable->is_start_codon(substr($seq, 0, 3)) ) {
 631                                  $output = 'M'. substr($output,1);
 632                          }      elsif ($throw) {
 633                                  $self->throw("Seq [$id]: Not using a valid initiator codon!");
 634                          } else {
 635                                  $self->warn("Seq [$id]: Not using a valid initiator codon!");
 636                          }
 637                  }
 638     }
 639
 640     my $seqclass;
 641     if ($self->can_call_new()) {
 642                  $seqclass = ref($self);
 643     } else {
 644                  $seqclass = 'Bio::PrimarySeq';
 645                  $self->_attempt_to_load_Seq();
 646     }
 647     my $out = $seqclass->new( '-seq' => $output,
 648                                                                                 '-display_id'  => $self->display_id,
 649                                                                                 '-accession_number' => $self->accession_number,
 650                                                                                 # is there anything wrong with retaining the
 651                                                                                 # description?
 652                                                                                 '-desc' => $self->desc(),
 653                                                                                 '-alphabet' => 'protein',
 654                               '-verbose' => $self->verbose
 655                               );
 656     return $out;
 657 }
 658
 659 =head2 id
 660
 661  Title   : id
 662  Usage   : $id = $seq->id()
 663  Function: ID of the sequence. This should normally be (and actually is in
 664            the implementation provided here) just a synonym for display_id().
 665  Returns : A string.
 666  Args    :
 667
 668 =cut
 669
 670 sub  id {
 671    my ($self)= @_;
 672
 673    return $self->display_id();
 674 }
 675
 676
 677 =head2 length
 678
 679  Title   : length
 680  Usage   : $len = $seq->length()
 681  Function:
 682  Returns : Integer representing the length of the sequence.
 683  Args    :
 684
 685 =cut
 686
 687 sub  length {
 688    my ($self)= @_;
 689    $self->throw_not_implemented();
 690 }
 691
 692 =head2 desc
 693
 694  Title   : desc
 695  Usage   : $seq->desc($newval);
 696            $description = $seq->desc();
 697  Function: Get/set description text for a seq object
 698  Returns : Value of desc
 699  Args    : newvalue (optional)
 700
 701 =cut
 702
 703 sub desc {
 704    shift->throw_not_implemented();
 705 }
 706
 707
 708 =head2 is_circular
 709
 710  Title   : is_circular
 711  Usage   : if( $obj->is_circular) { /Do Something/ }
 712  Function: Returns true if the molecule is circular
 713  Returns : Boolean value
 714  Args    : none
 715
 716 =cut
 717
 718 sub is_circular{
 719     shift->throw_not_implemented;
 720 }
 721
 722 =head1 Private functions
 723
 724 These are some private functions for the PrimarySeqI interface. You do not
 725 need to implement these functions
 726
 727 =head2 _find_orf
 728
 729  Title   : _find_orf
 730  Usage   :
 731  Function: Finds ORF starting at 1st initiation codon in nucleotide sequence.
 732            The ORF is not required to have a termination codon.
 733  Example :
 734  Returns : A nucleotide sequence or nothing, if no initiation codon is found.
 735  Args    : Nucleotide sequence, CodonTable object, alternative initiation
 736            codon (optional).
 737
 738 =cut
 739
 740 sub _find_orf {
 741         my ($self,$sequence,$codonTable,$start_codon) = @_;
 742
 743         # find initiation codon and remove leading sequence
 744         while ($sequence) {
 745                 my $codon = substr($sequence,0,3);
 746                 if ($start_codon) {
 747                         last if ( $codon =~ /$start_codon/i );
 748                 } else {
 749                         last if ($codonTable->is_start_codon($codon));
 750                 }
 751                 $sequence = substr($sequence,1);
 752         }
 753         return unless $sequence;
 754
 755         # find termination codon and remove trailing sequence
 756         my $len = CORE::length($sequence);
 757         my $offset = 3;
 758         while ($offset < $len) {
 759                 my $codon = substr($sequence,$offset,3);
 760                 if ( $codonTable->is_ter_codon($codon) ){
 761                         $sequence = substr($sequence, 0, $offset + 3);
 762                         return $sequence;
 763                 }
 764                 $offset += 3;
 765         }
 766         $self->warn("No termination codon found, will translate - sequence:\n$sequence");
 767         $sequence;
 768 }
 769
 770 =head2 _attempt_to_load_Seq
 771
 772  Title   : _attempt_to_load_Seq
 773  Usage   :
 774  Function:
 775  Example :
 776  Returns :
 777  Args    :
 778
 779
 780 =cut
 781
 782 sub _attempt_to_load_Seq{
 783    my ($self) = @_;
 784
 785    if( $main::{'Bio::PrimarySeq'} ) {
 786        return 1;
 787    } else {
 788        eval {
 789            require Bio::PrimarySeq;
 790        };
 791        if( $@ ) {
 792            my $text = "Bio::PrimarySeq could not be loaded for [$self]\n".
 793                "This indicates that you are using Bio::PrimarySeqI ".
 794                "without Bio::PrimarySeq loaded or without providing a ".
 795                "complete implementation.\nThe most likely problem is that there ".
 796                "has been a misconfiguration of the bioperl environment\n".
 797                "Actual exception:\n\n";
 798            $self->throw("$text$@\n");
 799            return 0;
 800        }
 801        return 1;
 802    }
 803
 804 }
 805
 806 1;