2 # bioperl module for Bio::Structure::Entry
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Kris Boulez <kris.boulez@algonomics.com>
8 # Copyright Kris Boulez
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::Structure::Entry - Bioperl structure Object, describes the whole entry
24 This object stores a whole Bio::Structure entry. It can consist of one
25 or more models (L<Bio::Structure::Model>), which in turn consist of one
26 or more chains (L<Bio::Structure::Chain>). A chain is composed of residues
27 (L<Bio::Structure::Residue>) and a residue consists of atoms
28 (L<Bio::Structure::Atom>). If no specific model or chain is chosen, the
35 User feedback is an integral part of the evolution of this and other
36 Bioperl modules. Send your comments and suggestions preferably to one
37 of the Bioperl mailing lists. Your participation is much appreciated.
39 bioperl-l@bioperl.org - General discussion
40 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
44 Please direct usage questions or support issues to the mailing list:
46 I<bioperl-l@bioperl.org>
48 rather than to the module maintainer directly. Many experienced and
49 reponsive experts will be able look at the problem and quickly
50 address it. Please include a thorough description of the problem
51 with code and data examples if at all possible.
55 Report bugs to the Bioperl bug tracking system to help us keep track
56 the bugs and their resolution. Bug reports can be submitted via the web:
58 https://github.com/bioperl/bioperl-live/issues
60 =head1 AUTHOR - Kris Boulez
62 Email kris.boulez@algonomics.com
66 The rest of the documentation details each of the object methods. Internal
67 methods are usually preceded with a _
72 # Let the code begin...
74 package Bio
::Structure
::Entry
;
77 use Bio
::Structure
::Model
;
78 use Bio
::Structure
::Chain
;
79 use Bio
::Annotation
::Collection
;
82 use base
qw(Bio::Root::Root Bio::Structure::StructureI);
87 Usage : $struc = Bio::Structure::Entry->new(
88 -id => 'structure_id',
91 Function: Returns a new Bio::Structure::Entry object from basic
92 constructors. Probably most called from Bio::Structure::IO.
93 Returns : a new Bio::Structure::Model object
98 my ($class, @args) = @_;
99 my $self = $class->SUPER::new
(@args);
101 my($id, $model, $chain, $residue ) =
102 $self->_rearrange([qw(
108 # where to store parent->child relations (1 -> 1..n)
109 # value to this hash will be an array ref
110 # by using Tie::RefHash we can store references in this hash
112 tie
%{ $self->{'p_c'} } , "Tie::RefHash";
114 # where to store child->parent relations (1 -> 1)
116 tie
%{ $self->{'c_p'} } , "Tie::RefHash";
118 $id && $self->id($id);
120 $self->{'model'} = [];
121 $model && $self->model($model);
124 if ( ! defined($self->model) ) { # no model yet, create default one
125 $self->_create_default_model;
127 for my $m ($self->model) { # add this chain on all models
132 $residue && $self->residue($residue);
134 # taken from Bio::Seq (or should we just inherit Bio::Seq and override some methods)
135 my $ann = Bio
::Annotation
::Collection
->new;
136 $self->annotation($ann);
145 Function: Connects a (or a list of) Model objects to a Bio::Structure::Entry.
146 To add a Model (and keep the existing ones) use add_model()
147 It returns a list of Model objects.
148 Returns : List of Bio::Structure::Model objects
149 Args : One Model or a reference to an array of Model objects
154 my ($self, $model) = @_;
156 if( defined $model) {
157 if( (ref($model) eq "ARRAY") ||
158 ($model->isa('Bio::Structure::Model')) ) {
159 # remove existing ones, tell they've become orphan
160 my @obj = $self->model;
163 $self->_remove_from_graph($m);
164 $self->{'model'} = [];
168 $self->add_model($self,$model);
171 $self->throw("Supplied a $model to model, we want a Bio::Structure::Model or a list of these\n");
174 # give back list of models via general get method
175 $self->get_models($self);
183 Usage : $structure->add_model($model);
184 Function: Adds a (or a list of) Model objects to a Bio::Structure::Entry.
186 Args : One Model or a reference to an array of Model objects
191 my($self,$entry,$model) = @_;
193 # if only one argument and it's a model, change evrything one place
194 # this is for people calling $entry->add_model($model);
195 if ( !defined $model && ref($entry) =~ /^Bio::Structure::Model/) {
199 # $self and $entry are the same here, but it's used for uniformicity
200 if ( !defined($entry) || ref($entry) !~ /^Bio::Structure::Entry/) {
201 $self->throw("first argument to add_model needs to be a Bio::Structure::Entry object\n");
203 if (defined $model) {
204 if (ref($model) eq "ARRAY") {
205 # if the user passed in a reference to an array
206 for my $m ( @
{$model} ) {
207 if( ! $m->isa('Bio::Structure::Model') ) {
208 $self->throw("$m is not a Model\n");
210 if ( $self->_parent($m) ) {
211 $self->throw("$m already assigned to a parent\n");
213 push @
{$self->{'model'}}, $m;
214 # create a stringified version of our ref
215 # not used untill we get symbolic ref working
216 #my $str_ref = "$self";
217 #$m->_grandparent($str_ref);
220 elsif ( $model->isa('Bio::Structure::Model') ) {
221 if ( $self->_parent($model) ) { # already assigned to a parent
222 $self->throw("$model already assigned\n");
224 push @
{$self->{'model'}}, $model;
225 # create a stringified version of our ref
226 #my $str_ref = "$self";
227 #$model->_grandparent($str_ref);
230 $self->throw("Supplied a $model to add_model, we want a Model or list of Models\n");
234 my $array_ref = $self->{'model'};
235 return $array_ref ? @
{$array_ref} : ();
242 Usage : $structure->get_models($structure);
243 Function: general get method for models attached to an Entry
244 Returns : a list of models attached to this entry
250 my ($self, $entry) = @_;
252 # self and entry can be the same
253 if ( !defined $entry) {
256 # pass through to add_model
257 $self->add_model($entry);
264 Usage : $entry->id("identity");
265 Function: Gets/sets the ID
272 my ($self, $value) = @_;
273 if (defined $value) {
274 $self->{'id'} = $value;
276 return $self->{'id'};
283 Usage : @chains = $structure->chain($chain);
284 Function: Connects a Chain or a list of Chain objects to a Bio::Structure::Entry.
285 Returns : List of Bio::Structure::Chain objects
286 Args : A Chain or a reference to an array of Chain objects
291 my ($self, $chain) = @_;
293 if ( ! $self->model ) {
294 $self->_create_default_model;
296 my @models = $self->model;
297 my $first_model = $models[0];
299 if ( defined $chain) {
301 if( (ref($chain) eq "ARRAY") || ($chain->isa('Bio::Structure::Chain')) ) {
302 # remove existing ones, tell they've become orphan
303 my @obj = $self->get_chains($first_model);
306 $self->_remove_from_graph($c);
310 $self->add_chain($first_model,$chain);
313 $self->throw("Supplied a $chain to chain, we want a Bio::Structure::Chain or a list of these\n");
316 $self->get_chains($first_model);
323 Usage : @chains = $structure->add_chain($model,$chain);
324 Function: Adds one or more Chain objects to a Bio::Structure::Entry.
325 Returns : List of Chain objects associated with the Model
326 Args : A Model object and a Chain object or a reference to an array of
332 my($self, $model, $chain) = @_;
334 if (ref($model) !~ /^Bio::Structure::Model/) {
335 $self->throw("add_chain: first argument needs to be a Model object ($model)\n");
337 if (defined $chain) {
338 if (ref($chain) eq "ARRAY") {
339 # if the user passed in a reference to an array
340 for my $c ( @
{$chain} ) {
341 if( ! $c->isa('Bio::Structure::Chain') ) {
342 $self->throw("$c is not a Chain\n");
344 if ( $self->_parent($c) ) {
345 $self->throw("$c already assigned to a parent\n");
347 $self->_parent($c, $model);
348 $self->_child($model, $c);
349 # stringify $self ref
350 #my $str_ref = "$self";
351 #$c->_grandparent($str_ref);
354 elsif ( $chain->isa('Bio::Structure::Chain') ) {
355 if ( $self->_parent($chain) ) { # already assigned to parent
356 $self->throw("$chain already assigned to a parent\n");
358 $self->_parent($chain,$model);
359 $self->_child($model, $chain);
360 # stringify $self ref
361 #my $str_ref = "$self";
362 #$chain->_grandparent($str_ref);
365 $self->throw("Supplied a $chain to add_chain, we want a Chain or list of Chains\n");
368 my $array_ref = $self->_child($model);
369 return $array_ref ? @
{$array_ref} : ();
376 Usage : $entry->get_chains($model);
377 Function: General get method for Chains attached to a Model
378 Returns : A list of Chains attached to this model
384 my ($self, $model) = @_;
386 if (! defined $model) {
387 $model = ($self->get_models)[0];
389 # pass through to add_chain
390 $self->add_chain($model);
397 Usage : @residues = $structure->residue($residue);
398 Function: Connects a (or a list of) Residue objects to a Bio::Structure::Entry.
399 Returns : List of Bio::Structure::Residue objects
400 Args : One Residue or a reference to an array of Residue objects
405 my ($self, $residue) = @_;
407 if ( ! $self->model ) {
408 my $m = $self->_create_default_model;
409 $self->add_model($self,$m);
411 my @models = $self->model;
412 my $first_model = $models[0];
414 if ( ! $self->get_chains($first_model) ) {
415 my $c = $self->_create_default_chain;
416 $self->add_chain($first_model, $c);
418 my @chains = $self->get_chains($first_model);
419 my $first_chain = $chains[0];
421 if( defined $residue) {
422 if( (ref($residue) eq "ARRAY") ||
423 ($residue->isa('Bio::Structure::Residue')) ) {
424 # remove existing ones, tell they've become orphan
425 my @obj = $self->get_residues($first_chain);
428 $self->_remove_from_graph($r);
432 $self->add_residue($first_chain,$residue);
435 $self->throw("Supplied a $residue to residue, we want a Bio::Structure::Residue or a list of these\n");
438 $self->get_residues($first_chain);
445 Usage : @residues = $structure->add_residue($chain,$residue);
446 Function: Adds one or more Residue objects to a Bio::Structure::Entry.
447 Returns : List of Bio::Structure::Residue objects
448 Args : A Chain object and a Residue object or a reference to an array of
454 my($self,$chain,$residue) = @_;
456 if (ref($chain) !~ /^Bio::Structure::Chain/) {
457 $self->throw("add_residue: first argument needs to be a Chain object\n");
459 if (defined $residue) {
460 if (ref($residue) eq "ARRAY") {
461 # if the user passed in a reference to an array
462 for my $r ( @
{$residue} ) {
463 if( ! $r->isa('Bio::Structure::Residue') ) {
464 $self->throw("$r is not a Residue\n");
466 if ( $self->_parent($r) ) {
467 $self->throw("$r already belongs to a parent\n");
469 $self->_parent($r, $chain);
470 $self->_child($chain, $r);
472 my $str_ref = "$self";
473 $r->_grandparent($str_ref);
476 elsif ( $residue->isa('Bio::Structure::Residue') ) {
477 if ( $self->_parent($residue) ) {
478 $self->throw("$residue already belongs to a parent\n");
480 $self->_parent($residue, $chain);
481 $self->_child($chain, $residue);
483 my $str_ref = "$self";
484 $residue->_grandparent($str_ref);
487 $self->throw("Supplied a $residue to add_residue, we want a Residue or list of Residues\n");
490 my $array_ref = $self->_child($chain);
491 return $array_ref ? @
{$array_ref} : ();
495 =head2 get_residues()
498 Usage : $structure->get_residues($chain);
499 Function: General get method for Residues attached to a Chain
500 Returns : A list of residues attached to this Chain
506 my ($self, $chain) = @_;
508 if ( !defined $chain) {
509 $self->throw("get_residues needs a Chain as argument");
511 # pass through to add_residue
512 $self->add_residue($chain);
519 Usage : @atoms = $structure->add_atom($residue,$atom);
520 Function: Adds a (or a list of) Atom objects to a Bio::Structure::Residue.
521 Returns : List of Bio::Structure::Atom objects
522 Args : A Residue and an Atom
527 my($self,$residue,$atom) = @_;
529 if (ref($residue) !~ /^Bio::Structure::Residue/) {
530 $self->throw("add_atom: first argument needs to be a Residue object\n");
533 if (ref($atom) eq "ARRAY") {
534 # if the user passed in a reference to an array
535 for my $a ( @
{$atom} ) {
536 if( ! $a->isa('Bio::Structure::Atom') ) {
537 $self->throw("$a is not an Atom\n");
539 if ( $self->_parent($a) ) {
540 $self->throw("$a already belongs to a parent\n");
542 $self->_parent($a, $residue);
543 $self->_child($residue, $a);
545 #my $str_ref = "$self";
546 #$r->_grandparent($str_ref);
549 #elsif ( $atom->isa('Bio::Structure::Atom') ) {
550 elsif ( ref($atom) =~ /^Bio::Structure::Atom/ ) {
551 if ( $self->_parent($atom) ) {
552 $self->throw("$atom already belongs to a parent\n");
554 $self->_parent($atom, $residue);
555 $self->_child($residue, $atom);
557 #my $str_ref = "$self";
558 #$atom->_grandparent($str_ref);
561 my $array_ref = $self->_child($residue);
562 return $array_ref ? @
{$array_ref} : ();
569 Usage : $structure->get_atoms($residue);
570 Function: General get method for Atoms attached to a Residue
571 Returns : A list of Atoms attached to this Residue
577 my ($self, $residue) = @_;
579 if ( !defined $residue) {
580 $self->throw("get_atoms needs a Residue as argument");
582 # pass through to add_atom
583 $self->add_atom($residue);
590 Usage : $structure->parent($residue);
591 Function: Returns the parent of the argument
592 Returns : The parent of the argument
593 Args : A Bio::Structure object
601 Function: Alias to conect()
609 return $self->conect(@_);
615 Usage : $structure->conect($source);
616 Function: Get/set method for conect
617 Returns : A list of serial numbers for Atoms connected to source
618 (together with $entry->get_atom_by_serial($model, $serial),
619 this should be OK for now)
620 Args : The source, the serial number for the source Atom, and the type
625 my ($self, $source, $serial, $type) = @_;
627 if ( !defined $source ) {
628 $self->throw("You need to supply at least a source to connect");
630 if ( defined $serial && defined $type ) {
631 if ( !exists(${$self->{'conect'}}{$source}) ||
632 ref(${$self->{'conect'}}{$source} !~ /^ARRAY/ ) ) {
633 ${$self->{'conect'}}{$source} = [];
635 # we also need to store type, a conect object might be better
636 my $c = $serial . "_" . $type;
637 push @
{ ${$self->{'conect'}}{$source} }, $c;
640 return () if ( !exists $self->{'conect'}{$source} ||
641 !defined $self->{'conect'}{$source} );
642 return @
{ ${$self->{'conect'}}{$source} };
645 =head2 get_all_connect_source
647 Title : get_all_connect_source
649 Function: Alias to get_all_conect_source()
655 sub get_all_connect_source
{
657 return get_all_conect_source
(@_);
660 =head2 get_all_conect_source()
662 Title : get_all_conect_source
663 Usage : @sources = $structure->get_all_conect_source;
664 Function: Get all the sources for the conect records
665 Returns : A list of serial numbers for atoms connected to source
666 (together with $entry->get_atom_by_serial($model, $serial),
667 this should be OK for now)
669 Notes : This is a bit of a kludge, but it is the best for now. Conect info might need
670 to go in a separate object
674 sub get_all_conect_source
{
678 for my $source (sort {$a<=>$b} keys %{$self->{'conect'}}) {
679 push @sources, $source;
688 Usage : $structure->master($source);
689 Function: Get/set method for master
690 Returns : The master line
691 Args : The master line for this entry
696 my ($self, $value) = @_;
697 if (defined $value) {
698 $self->{'master'} = $value;
700 return $self->{'master'};
707 Usage : $seqobj = $structure->seqres("A");
708 Function: Gets a sequence object containing the sequence from the SEQRES record.
709 if a chain-ID is given, the sequence for this chain is given, if none
710 is provided the first chain is chosen
711 Returns : A Bio::PrimarySeq
712 Args : The chain-ID of the chain you want the sequence from
717 my ($self, $chainid) = @_;
718 my $s_u = "x3 A1 x7 A3 x1 A3 x1 A3 x1 A3 x1 A3 x1 A3 x1 A3 x1 A3 x1 A3 x1 A3 x1 A3 x1 A3 x1 A3";
720 if ( !defined $chainid) {
721 my $m = ($self->get_models($self))[0];
722 my $c = ($self->get_chains($m))[0];
725 my $seqres = ($self->annotation->get_Annotations("seqres"))[0];
726 my $seqres_string = $seqres->as_text;
727 $self->debug("seqres : $seqres_string\n");
728 $seqres_string =~ s/^Value: //;
729 # split into lines of 62 long
730 my @l = unpack("A62" x
(length($seqres_string)/62), $seqres_string);
732 # get out chain_id and sequence
733 # we use a1, as A1 strips all spaces :(
734 my ($chid, $seq) = unpack("x3 a1 x7 A51", $line);
738 $seq =~ s/(\w+)/\u\L$1/g; # ALA -> Ala (for SeqUtils)
739 $seq =~ s/\s//g; # strip all spaces
740 $seq_ch{$chid} .= $seq;
741 $self->debug("seqres : $chid $seq_ch{$chid}\n");
743 # do we have a seqres for this chainid
744 if(! exists $seq_ch{$chainid} ) {
745 $self->warn("There is no SEQRES known for chainid \"$chainid\"");
749 # this will break for non-protein structures (about 10% for now) XXX KB
750 my $pseq = Bio
::PrimarySeq
->new(-alphabet
=> 'protein');
751 $pseq = Bio
::SeqUtils
->seq3in($pseq,$seq_ch{$chainid});
752 my $id = $self->id . "_" . $chainid;
758 =head2 get_atom_by_serial()
760 Title : get_atom_by_serial
761 Usage : $structure->get_atom_by_serial($model,$serial);
762 Function: Get the Atom by serial
763 Returns : The Atom object with this serial number in the model
764 Args : Model on which to work, serial number for atom
765 (if only a number is supplied, the first model is chosen)
769 sub get_atom_by_serial
{
770 my ($self, $model, $serial) = @_;
772 if ($model =~ /^\d+$/ && !defined $serial) { # only serial given
774 my @m = $self->get_models($self);
777 if ( !defined $model || ref($model) !~ /^Bio::Structure::Model/ ) {
778 $self->throw("Could not find (first) model\n");
780 if ( !defined $serial || ($serial !~ /^\d+$/) ) {
781 $self->throw("The serial number you provided looks fishy ($serial)\n");
783 for my $chain ($self->get_chains($model) ) {
784 for my $residue ($self->get_residues($chain) ) {
785 for my $atom ($self->get_atoms($residue) ) {
786 # this could get expensive, do we cache ???
787 next unless ($atom->serial == $serial);
795 my ($self, $obj) = @_;
797 if ( !defined $obj) {
798 $self->throw("parent: you need to supply an argument to get the parent from\n");
801 # for now we pass on to _parent, untill we get the symbolic ref thing working.
802 $self->_parent($obj);
808 %{ $self->{'p_c'} } = ();
809 %{ $self->{'c_p'} } = ();
815 Usage : $obj->annotation($seq_obj)
818 Returns : value of annotation
819 Args : newvalue (optional)
824 my ($obj,$value) = @_;
825 if( defined $value) {
826 $obj->{'annotation'} = $value;
828 return $obj->{'annotation'};
833 # from here on only private methods
836 =head2 _remove_models()
838 Title : _remove_models
840 Function: Removes the models attached to an Entry. Tells the models they
841 do not belong to this Entry any more
856 =head2 _create_default_model()
858 Title : _create_default_model
860 Function: Creates a default Model for this Entry. Typical situation
861 in an X-ray structure where there is only one model
867 sub _create_default_model
{
870 my $model = Bio
::Structure
::Model
->new(-id
=> "default");
875 =head2 _create_default_chain()
877 Title : _create_default_chain
879 Function: Creates a default Chain for this Model. Typical situation
880 in an X-ray structure where there is only one chain
886 sub _create_default_chain
{
889 my $chain = Bio
::Structure
::Chain
->new(-id
=> "default");
898 Usage : This is an internal function only. It is used to have one
899 place that keeps track of which object has which other object
900 as parent. Thus allowing the underlying modules (Atom, Residue,...)
901 to have no knowledge about all this (and thus removing the possibility
902 of reference cycles).
903 This method hides the details of manipulating references to an anonymous
905 Function: To get/set an objects parent
906 Returns : A reference to the parent if it exist, undef otherwise. In the
907 current implementation each node should have a parent (except Entry).
912 # manipulating the c_p hash
916 my ($self, $key, $value) = @_;
918 if ( (!defined $key) || (ref($key) !~ /^Bio::/) ) {
919 $self->throw("First argument to _parent needs to be a reference to a Bio:: object ($key)\n");
921 if ( (defined $value) && (ref($value) !~ /^Bio::/) ) {
922 $self->throw("Second argument to _parent needs to be a reference to a Bio:: object\n");
924 # no checking here for consistency of key and value, needs to happen in caller
926 if (defined $value) {
927 # is this value already in, shout
928 if (defined ( $self->{'c_p'}->{$key}) &&
929 exists ( $self->{'c_p'}->{$key})
931 $self->throw("_parent: $key already has a parent ${$self->{'c_p'}}{$key}\n");
933 ${$self->{'c_p'}}{$key} = $value;
935 return ${$self->{'c_p'}}{$key};
942 Usage : This is an internal function only. It is used to have one
943 place that keeps track of which object has which other object
944 as child. Thus allowing the underlying modules (Atom, Residue,...)
945 to have no knowledge about all this (and thus removing the possibility
946 to have no knowledge about all this (and thus removing the possibility
947 of reference cycles).
948 This method hides the details of manipulating references to an anonymous
950 Function: To get/set an the children of an object
951 Returns : A reference to an array of child(ren) if they exist, undef otherwise.
956 # manipulating the p_c hash
958 my ($self, $key, $value) = @_;
960 if ( (!defined $key) || (ref($key) !~ /^Bio::/) ) {
961 $self->throw("First argument to _child needs to be a reference to a Bio:: object\n");
963 if ( (defined $value) && (ref($value) !~ /^Bio::/) ) {
964 $self->throw("Second argument to _child needs to be a reference to a Bio:: object\n");
966 # no checking here for consistency of key and value, needs to happen in caller
968 if (defined $value) {
969 if ( !exists(${$self->{'p_c'}}{$key}) || ref(${$self->{'p_c'}}{$key}) !~ /^ARRAY/ ) {
970 ${$self->{'p_c'}}{$key} = [];
972 push @
{ ${$self->{'p_c'}}{$key} }, $value;
974 return ${$self->{'p_c'}}{$key};
977 =head2 _remove_from_graph()
979 Title : _remove_from_graph
980 Usage : This is an internal function only. It is used to remove from
981 the parent/child graph. We only remove the links from object to
982 his parent. Not the ones from object to its children.
983 Function: To remove an object from the parent/child graph
985 Args : The object to be orphaned
989 sub _remove_from_graph
{
990 my ($self, $object) = @_;
992 if ( !defined($object) && ref($object) !~ /^Bio::/) {
993 $self->throw("_remove_from_graph needs a Bio object as argument");
995 if ( $self->_parent($object) ) {
996 my $dad = $self->_parent($object);
997 # if we have a parent, remove me as being a child
998 for my $k (0 .. $#{$self->_child($dad)}) {
999 if ($object eq ${$self->{'p_c'}{$dad}}[$k]) {
1000 splice(@
{$self->{'p_c'}{$dad}}, $k,1);
1003 delete( $self->{'c_p'}{$object});
1008 sub _print_stats_pc
{
1009 # print stats about the parent/child hashes
1011 my $pc = scalar keys %{$self->{'p_c'}};
1012 my $cp = scalar keys %{$self->{'c_p'}};
1013 my $now_time = Time
::HiRes
::time();
1014 $self->debug("pc stats: P_C $pc C_P $cp $now_time\n");