3 # BioPerl module for Bio::Graph::ProteinGraph
5 # You may distribute this module under the same terms as perl itself
6 # POD documentation - main docs before the code
10 Bio::Graph::ProteinGraph - a representation of a protein interaction graph.
15 my $graphio = Bio::Graph::IO->new(-file => 'myfile.dat',
17 my $graph = $graphio->next_network();
19 =head2 Using ProteinGraph
21 # Remove duplicate interactions from within a dataset
22 $graph->remove_dup_edges();
24 # Get a node (represented by a sequence object) from the graph.
25 my $seqobj = $gr->nodes_by_id('P12345');
27 # Get clustering coefficient of a given node.
28 my $cc = $gr->clustering_coefficient($graph->nodes_by_id('NP_023232'));
29 if ($cc != -1) { ## result is -1 if cannot be calculated
30 print "CC for NP_023232 is $cc";
34 my $density = $gr->density();
36 # Get connected subgraphs
37 my @graphs = $gr->components();
40 $gr->remove_nodes($gr->nodes_by_id('P12345'));
42 # How many interactions are there?
43 my $count = $gr->edge_count;
45 # How many nodes are there?
46 my $ncount = $gr->node_count();
48 # Let's get interactions above a threshold confidence score.
49 my $edges = $gr->edges;
50 for my $edge (keys %$edges) {
51 if (defined($edges->{$edge}->weight()) &&
52 $edges->{$edge}->weight() > 0.6) {
53 print $edges->{$edge}->object_id(), "\t",
54 $edges->{$edge}->weight(),"\n";
58 # Get interactors of your favourite protein
59 my $node = $graph->nodes_by_id('NP_023232');
60 my @neighbors = $graph->neighbors($node);
61 print " NP_023232 interacts with ";
62 print join " ,", map{$_->object_id()} @neighbors;
65 # Annotate your sequences with interaction info
66 my @seqs; ## array of sequence objects
68 if ($graph->has_node($seq->accession_number)) {
69 my $node = $graph->nodes_by_id( $seq->accession_number);
70 my @neighbors = $graph->neighbors($node);
71 for my $n (@neighbors) {
72 my $ft = Bio::SeqFeature::Generic->new(
73 -primary_tag => 'Interactor',
74 -tags => { id => $n->accession_number }
76 $seq->add_SeqFeature($ft);
81 # Get proteins with > 10 interactors
82 my @nodes = $graph->nodes();
84 for my $node (@nodes) {
85 if ($graph->neighbor_count($node) > 10) {
89 print "the following proteins have > 10 interactors:\n";
90 print join "\n", map{$_->object_id()} @hubs;
92 # Merge graphs 1 and 2 and flag duplicate edges
94 my @duplicates = $g1->dup_edges();
95 print "these interactions exist in $g1 and $g2:\n";
96 print join "\n", map{$_->object_id} @duplicates;
98 =head2 Creating networks from your own data
100 If you have interaction data in your own format, e.g.
102 edgeid node1 node2 score
104 my $io = Bio::Root::IO->new(-file => 'mydata');
105 my $gr = Bio::Graph::ProteinGraph->new();
106 my %seen = (); # to record seen nodes
107 while (my $l = $io->_readline() ) {
109 # Parse out your data...
110 my ($e_id, $n1, $n2, $sc) = split /\s+/, $l;
112 # ...then make nodes if they don't already exist in the graph...
114 for my $n ($n1, $n2 ) {
115 if (!exists($seen{$n})) {
116 push @nodes, Bio::Seq->new(-accession_number => $n);
117 $seen{$n} = $nodes[$#nodes];
119 push @nodes, $seen{$n};
124 # ...and add a new edge to the graph
125 my $edge = Bio::Graph::Edge->new(-nodes => \@nodes,
128 $gr->add_edge($edge);
132 A ProteinGraph is a representation of a protein interaction network.
133 It derives most of its functionality from the L<Bio::Graph::SimpleGraph>
134 module, but is adapted to be able to use protein identifiers to
137 This graph can use any objects that implement L<Bio::AnnotatableI> and
138 L<Bio::IdentifiableI> interfaces. L<Bio::Seq> (but not L<Bio::PrimarySeqI>)
139 objects can therefore be used for the nodes but any object that supports
140 annotation objects and the object_id() method should work fine.
142 At present it is fairly 'lightweight' in that it represents nodes and
143 edges but does not contain all the data about experiment ids etc. found
144 in the Protein Standards Initiative schema. Hopefully that will be
147 A dataset may contain duplicate or redundant interactions.
148 Duplicate interactions are interactions that occur twice in the dataset
149 but with a different interaction ID, perhaps from a different
150 experiment. The dup_edges method will retrieve these.
152 Redundant interaction are interactions that occur twice or more in a
153 dataset with the same interaction id. These are more likely to be
154 due to database errors. These methods are useful when merging 2
155 datasets using the union() method. Interactions present in both
156 datasets, with different IDs, will be duplicate edges.
158 =head2 For Developers
160 In this module, nodes are represented by L<Bio::Seq::RichSeq> objects
161 containing all possible database identifiers but no sequence, as
162 parsed from the interaction files. However, a node represented by a
163 L<Bio::PrimarySeq> object should work fine too.
165 Edges are represented by L<Bio::Graph::Edge> objects. In order to
166 work with SimpleGraph these objects must be array references, with the
167 first 2 elements being references to the 2 nodes. More data can be
168 added in $e[2]. etc. Edges should be L<Bio::Graph::Edge> objects, which
169 are L<Bio::IdentifiableI> implementing objects.
171 At present edges only have an identifier and a weight() method, to
172 hold confidence data, but subclasses of this could hold all the
173 interaction data held in an XML document.
175 So, a graph has the following data:
177 1. A hash of nodes ('_nodes'), where keys are the text representation of a
178 nodes memory address and values are the sequence object references.
180 2. A hash of neighbors ('_neighbors'), where keys are the text representation of a
181 nodes memory address and a value is a reference to a list of
182 neighboring node references.
184 3. A hash of edges ('_edges'), where a key is a text representation of the 2 nodes.
185 E.g., "address1,address2" as a string, and values are Bio::Graph::Edge
188 4. Look up hash ('_id_map') for finding a node by any of its ids.
190 5. Look up hash for edges ('_edge_id_map') for retrieving an edge
191 object from its identifier.
193 6. Hash ('_components').
195 7. An array of duplicate edges ('_dup_edges').
197 8. Hash ('_is_connected').
201 To use this code you will need the Clone.pm module availabe from CPAN.
202 You also need Class::AutoClass, available from CPAN as well. To read in
203 XML data you will need XML::Twig available from CPAN.
207 L<Bio::Graph::SimpleGraph>
210 L<Bio::Graph::IO::dip>
211 L<Bio::Graph::IO::psi_xml>
217 User feedback is an integral part of the evolution of this and other
218 Bioperl modules. Send your comments and suggestions preferably to one
219 of the Bioperl mailing lists. Your participation is much appreciated.
221 bioperl-l@bioperl.org - General discussion
222 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
224 =head2 Reporting Bugs
226 Report bugs to the Bioperl bug tracking system to help us keep track
227 the bugs and their resolution. Bug reports can be submitted via the
230 http://bugzilla.open-bio.org/
234 Richard Adams - this module, Graph::IO modules.
236 Email richard.adams@ed.ac.uk
240 Nat Goodman - SimpleGraph.pm, and all underlying graph algorithms.
245 package Bio
::Graph
::ProteinGraph
;
246 use Bio
::Graph
::Edge
;
248 use base
qw(Bio::Graph::SimpleGraph);
253 purpose : Is a protein in the graph?
254 usage : if ($g->has_node('NP_23456')) {....}
255 returns : 1 if true, 0 if false
256 arguments : A sequence identifier.
262 my ($self, $arg) = @_;
264 $self->throw ("I need a sequence identifier!");
266 my @nodes = $self->nodes_by_id($arg);
267 if (defined($nodes[0])){return 1;}else{return 0};
274 Purpose : get node memory address from an id
275 Usage : my @neighbors= $self->neighbors($self->nodes_by_id('O232322'))
276 Returns : a SimpleGraph node representation ( a text representation
277 of a node needed for other graph methods e.g.,
279 Arguments : a protein identifier., e.g., its accession number.
286 my @nodes = $self->_ids(@_);
287 wantarray?
@nodes: $nodes[0];
295 Purpose : To merge two graphs together, flagging interactions as
297 Usage : $g1->union($g2), where g1 and g2 are 2 graph objects.
298 Returns : void, $g1 is modified
299 Arguments : A Graph object of the same class as the calling object.
300 Description : This method merges 2 graphs. The calling graph is modified,
301 the parameter graph ($g2) in usage) is unchanged. To take
302 account of differing IDs identifying the same protein, all
303 ids are compared. The following rules are used to modify $g1.
305 First of all both graphs are scanned for nodes that share
308 1. If 2 nodes(proteins) share an interaction in both graphs,
309 the edge in graph 2 is copied to graph 1 and added as a
310 duplicate edge to graph 1,
312 2. If 2 nodes interact in $g2 but not $g1, but both nodes exist
313 in $g1, the attributes of the interaction in $g2 are
314 used to make a new edge in $g1.
316 3. If 2 nodes interact in g2 but not g1, and 1 of them is a new
317 protein, that protein is put in $g1 and a new edge made to
320 4. At present, if there is an interaction in $g2 composed of a
321 pair of interactors that are not present in $g1, they are
322 not copied to $g1. This is rather conservative but prevents
323 the problem of having redundant nodes in $g1 due to the same
324 protein being identified by different ids in the same graph.
334 Graph 2: X1 P1 P2 - will be added as duplicate to Graph1
335 X2 P1 X4 - X4 added to Graph 1 and new edge made
336 X3 P2 P3 - new edge links existing proteins in G1
337 X4 Z4 Z5 - not added to Graph1. Are these different
338 proteins or synonyms for proteins in G1?
344 my ($self, $other) = @_;
345 my $class = ref($self);
346 if (!$other->isa($class)) {
347 $self->throw("I need a ". $class . " object, not a [".
348 ref($other). "] object");
351 my %detected_common_nodes;
352 my %seen_ids; # holds ids of nodes already known to be common.
354 ## for each node see if Ids are in common between the 2 graphs
355 ## just get1 common id per sequence.
357 ## Produces too many common nodes we only need 1 common id between nodes.
358 for my $id (sort keys %{$self->{'_id_map'}}) {
359 if (exists($other->{'_id_map'}{$id}) ) {
360 ## check if this node has a commonlink kown lready:
361 my $node = $self->nodes_by_id($id);
362 my $acc = $node->object_id;
363 if (!exists($detected_common_nodes{$acc})) {
364 push @common_nodes, $id; ## we store the common id
365 $detected_common_nodes{$acc} = undef; ## this means we won't store >1 common identifier
370 ## now cyle through common nodes..
371 $self->debug( "there are ". scalar @common_nodes. " common nodes\n");
373 for my $common (@common_nodes) {
374 if ($i++ % 10 ==0 ) {
377 ## get neighbours of common node for self and other
378 my @self_ns = $self->neighbors($self->nodes_by_id($common));
379 my @other_ns = $other->neighbors($other->nodes_by_id($common));
381 ## now get all ids of all neighbours
382 my %self_n_ids = $self->_get_ids(@self_ns); # get all ids of neighbors
384 ## cycle through other neighbors
385 for my $other_n(@other_ns){
386 my %other_n_ids = $self->_get_ids($other_n); # get ids of single other neighbor
388 ## case (1) in description
389 ## do any ids in other graph exist in self ?
390 # if yes, @int_match is defined, interaction does not involve a new node
391 my @int_match = grep{exists($self->{'_id_map'}{$_}) } keys %other_n_ids;
396 ## we cycle through until we have an edge defined, this deals with
397 ## multiple id matches
398 while (!$edge && $i <= $#int_match){
400 ## get edge from other graph
401 my $other_edge = $other->edge(
402 [$other->nodes_by_id($common),
403 $other->nodes_by_id($other_n->object_id)]
407 my $edge = Bio
::Graph
::Edge
->new(
408 -weight
=> $other_edge->weight(),
409 -id
=> $other_edge->object_id(),
410 -nodes
=>[$self->nodes_by_id($common),
411 $self->nodes_by_id($int_match[$i])
413 ## add it to self graph.
414 ## add_edge() works out if the edge is a new,
415 ## duplicate or a redundant edge.
416 $self->add_edge($edge);
421 ## but if other neighbour is entirely new, clone it and
424 my $other_edge = $other->edge($other->nodes_by_id($other_n->object_id()),
425 $other->nodes_by_id($common));
426 my $new = clone
($other_n);
427 $self->add_edge(Bio
::Graph
::Edge
->new(
428 -weight
=> $other_edge->weight(),
429 -id
=> $other_edge->object_id(),
430 -nodes
=>[$new, $self->nodes_by_id($common)],
434 ## add new ids to self graph look up table
435 map {$self->{'_id_map'}{$_} = $new} keys %other_n_ids;
444 Purpose : returns number of unique interactions, excluding
445 redundancies/duplicates
448 Usage : my $count = $graph->edge_count;
455 return scalar keys %{$self->_edges};
462 Purpose : returns number of nodes.
465 Usage : my $count = $graph->node_count;
472 return scalar keys %{$self->_nodes};
476 =head2 neighbor_count
478 Name : neighbor_count
479 Purpose : returns number of neighbors of a given node
480 Usage : my $count = $gr->neighbor_count($node)
481 Arguments : a node object
488 my ($self, $node) = @_;
489 if (!$node->isa('Bio::NodeI')) {
490 $self->throw ("I need a Bio::NodeI implementing object here , not a " . ref($node) . ".");
492 my @nbors = $self->neighbors($node);
493 return scalar @nbors;
496 =head2 _get_ids_by_db
498 Name : _get_ids_by_db
499 Purpose : gets all ids for a node, assuming its Bio::Seq object
500 Arguments: A Bio::SeqI object
501 Returns : A hash: Keys are db ids, values are accessions
502 Usage : my %ids = $gr->_get_ids_by_db($seqobj);
508 my $dummy_self = shift;
509 while (my $n = shift @_ ){ #ref to node, assume is a Bio::Seq
510 if (!$n->isa('Bio::AnnotatableI') || ! $n->isa('Bio::IdentifiableI' )) {
511 $n->throw("I need a Bio::AnnotatableI and Bio::IdentifiableI implementing object, not a [" .ref($n) ."]");
514 ##if BioSeq getdbxref ids as well.
515 my $ac = $n->annotation();
516 for my $an($ac->get_Annotations('dblink')) {
517 $ids{$an->database()} = $an->primary_id();
526 my $dummy_self = shift;
527 while (my $n = shift @_ ){ #ref to node, assume is a Bio::Seq
528 if (!$n->isa('Bio::AnnotatableI') || ! $n->isa('Bio::IdentifiableI' )) {
529 $n->throw("I need a Bio::AnnotatableI and Bio::IdentifiableI implementing object, not a [" .ref($n) ."]");
532 map {$ids{$_} = undef} ($n->object_id);
534 ##if BioSeq getdbxref ids as well.
535 if ($n->can('annotation')) {
536 my $ac = $n->annotation();
537 for my $an($ac->get_Annotations('dblink')) {
538 $ids{$an->primary_id()} = undef;
549 Purpose : adds an interaction to a graph.
550 Usage : $gr->add_edge($edge)
551 Arguments : a Bio::Graph::Edge object, or a reference to a 2 element list.
553 Description : This is the method to use to add an interaction to a graph.
554 It contains the logic used to determine if a graph is a
555 new edge, a duplicate (an existing interaction with a
556 different edge id) or a redundant edge (same interaction,
564 my $edges = $self->_edges;
565 my $neighbors = $self->_neighbors;
566 my $dup_edges = $self->_dup_edges;
569 if ( ref($_[0]) eq 'ARRAY' || !ref($_[0])) {
570 $self->SUPER::add_edges
(@_);
573 elsif ( $_[0]->isa('Bio::Graph::Edge') ) { # it's already an edge
577 $self->throw(" Invalid edge! - must be an array of nodes, or an edge object");
580 my ($m, $n) = $edge->nodes();
581 next if $m eq $n; # no self edges
582 last unless defined $m && defined $n;
583 ($m,$n) = ($n,$m) if "$n" lt "$m";
585 if (!exists($edges->{$m,$n})) {
586 $self->add_node($m,$n);
587 ($m,$n) = $self->nodes($m,$n);
588 $edges->{$m,$n} = $edge;
589 push(@
{$neighbors->{$m}},$n);
590 push(@
{$neighbors->{$n}},$m);
592 ## create look up hash for edge ##
593 $self->{'_edge_id_map'}{$edge->object_id()} = $edge;
595 ## is it a redundant edge, ie with same edge id?
596 my $curr_edge = $edges->{$m,$n};
597 if($curr_edge->object_id() eq $edge->object_id()) {
598 $self->redundant_edge($edge);
600 ## else it is a duplicate i.e., same nodes but different edge id
602 $self->add_dup_edge($edge);
606 $self->_is_connected(undef); # clear cached value
613 Purpose : To construct a subgraph of nodes from the main network.This
614 method overrides that of Bio::Graph::SimpleGraph in its dealings with
616 Usage : my $sg = $gr->subgraph(@nodes).
617 Returns : A subgraph of the same class as the original graph. Edge objects are
618 cloned from the original graph but node objects are shared, so beware if you
619 start deleting nodes from the parent graph whilst operating on subgraph nodes.
620 Arguments : A list of node objects.
627 ## make new graph of same type as parent
628 my $class = ref($self);
629 my $subgraph = new
$class;
630 $subgraph->add_node(@_);
631 # add all edges amongst the nodes
632 my @nodes=$subgraph->nodes;
635 if ($i++ % 100 == 0) { print STDERR
".";}
637 my $edges = $self->_edges;
639 if ($self->has_edge([$m,$n])) {
640 my ($edge) = $self->edges([$m,$n]); ## returns list of edges
641 my $id = $edge->object_id;
642 $subgraph->add_edge(Bio
::Graph
::Edge
->new(-nodes
=>[$m,$n],
653 Purpose : to flag an interaction as a duplicate, take advantage of
654 edge ids. The idea is that interactions from 2 sources with
655 different interaction ids can be used to provide more
656 evidence for a interaction being true, while preventing
657 redundancy of the same interaction being present more than
658 once in the same dataset.
659 Returns : 1 on successful addition, 0 on there being an existing
661 Usage : $gr->add_dup_edge(edge->new (-nodes => [$n1, $n2],
664 Arguments : an EdgeI implementing object.
673 my ($self, $newedge) = @_;
675 my $newedge_id = $newedge->object_id();
677 ## now we have node objects, an edge id.
679 my $dup_edges = $self->_dup_edges();
680 if(!grep{$_->object_id eq $newedge_id } @
$dup_edges) {
681 push @
$dup_edges, $newedge;
684 $self->redundant_edge($newedge);
691 Purpose : retrieve data about an edge from its id
692 Arguments : a text identifier
693 Returns : a Bio::Graph::Edge object or undef
694 Usage : my $edge = $gr->edge_by_id('1000E');
700 my ($self, $id) = @_;
702 $self->warn ("Need a text identifier");
706 $self->throw(" I need a text identifier, not a [" . ref($id) . "].");
708 if (defined($self->{'_edge_id_map'}{$id})) {
709 return $self->{'_edge_id_map'}{$id};
715 =head2 remove_dup_edges
717 Name : remove_dup_edges
718 Purpose : removes duplicate edges from graph
719 Arguments : none - removes all duplicate edges
720 edge id list - removes specified edges
722 Usage : $gr->remove_dup_edges()
723 or $gr->remove_dup_edges($edgeid1, $edgeid2);
727 sub remove_dup_edges
{
728 my ($self, @args) = @_;
729 my $dups = $self->_dup_edges();
734 while (my $node = shift @args) {
736 for my $dup (@
$dups) {
737 if (!grep{$node eq $_} $dup->nodes) {
738 push @new_dups, $dup;
748 =head2 redundant_edge
750 Name : redundant_edge
751 Purpose : adds/retrieves redundant edges to graph
752 Usage : $gr->redundant_edge($edge)
753 Arguments : none (getter) or a Biuo::Graph::Edge object (setter).
754 Description : redundant edges are edges in a graph that have the
755 same edge id, ie. are 2 identical interactions.
756 With edge arg adds it to list, else returns list as reference.
762 my ($self, $edge) =@_;
764 if (!$edge->isa('Bio::Graph::Edge')) {
765 $self->throw ("I need a Bio::Graph::Edge object , not a [". ref($edge). "] object.");
767 if (!exists($self->{'_redundant_edges'})) {
768 $self->{'_redundant_edges'} = [];
770 ## add edge to list if not already listed
771 if (!grep{$_->object_id eq $edge->object_id} @
{$self->{'_redundant_edges'}}){
772 push @
{$self->{'_redundant_edges'}}, $edge;
776 if (exists ($self->{'_redundant_edges'})){
777 return @
{$self->{'_redundant_edges'}};
785 =head2 redundant_edges
787 Name : redundant_edges
788 Purpose : alias for redundant_edge
792 sub redundant_edges
{
794 return $self->redundant_edge(shift);
797 =head2 remove_redundant_edges
799 Name : remove_redundant_edges
800 Purpose : removes redundant_edges from graph, used by remove_node(),
801 may be better as an internal method??
802 Arguments : none - removes all redundant edges
803 edge id list - removes specified edges
805 Usage : $gr->remove_redundant_edges()
806 or $gr->remove_redundant_edges($edgeid1, $edgeid2);
810 sub remove_redundant_edges
{
811 my ($self, @args) = @_;
812 my @dups = $self->redundant_edge();
813 ## if no args remove all
815 $self->{'_redundant_edges'} = [];
818 while (my $node = shift @args) {
820 for my $dup (@dups) {
821 if (!grep{$node eq $_} $dup->nodes) {
822 push @new_dups, $dup;
825 $self->{'_redundant_edges'} = \
@new_dups;
832 =head2 clustering_coefficient
834 Name : clustering_coefficient
835 Purpose : determines the clustering coefficient of a node, a number
836 in range 0-1 indicating the extent to which the neighbors of
837 a node are interconnnected.
838 Arguments : A sequence object (preferred) or a text identifier
839 Returns : The clustering coefficient. 0 is a valid result.
840 If the CC is not calculable ( if the node has <2 neighbors),
842 Usage : my $node = $gr->nodes_by_id('P12345');
843 my $cc = $gr->clustering_coefficient($node);
847 sub clustering_coefficient
{
848 my ($self, $val) = @_;
849 my $n = $self->_check_args($val);
850 $self->throw("[$val] is an incorrect parameter, not presnt in the graph")
852 my @n = $self->neighbors($n);
853 my $n_count = scalar @n;
856 ## calculate cc if we can
858 for (my $i = 0; $i <= $#n; $i++ ) {
859 for (my $j = $i+1; $j <= $#n; $j++) {
860 if ($self->has_edge($n[$i], $n[$j])){
865 $c = 2 * $c / ($n_count *($n_count - 1));
866 return $c; # can be 0 if unconnected.
868 return -1; # if value is not calculable
875 Purpose : to delete a node from a graph, e.g., to simulate effect
877 Usage : $gr->remove_nodes($seqobj);
878 Arguments : a single $seqobj or list of seq objects (nodes)
879 Returns : 1 on success
887 $self->warn("You have to specify a node");
890 my $edges = $self->_edges;
891 my $ns = $self->_neighbors;
892 my $dups = $self->_dup_edges;
893 my $nodes = $self->_nodes;
894 while (my $val = shift @_ ) {
897 my $node = $self->_check_args($val);
898 $self->throw("[$val] is an incorrect parameter, not present in the graph")
899 unless defined($node);
901 ##1. remove dup edges and redundant edges containing the node ##
902 $self->remove_dup_edges($node);
903 $self->remove_redundant_edges($node);
905 ##2. remove node from interactor's neighbours
906 my @ns = $self->neighbors($node);
908 my @otherns = $self->neighbors($n); #get neighbors of neighbors
910 ##look for node in neighbor's neighbors
911 @new_others = grep{$node ne $_} @otherns;
912 $ns->{$n} = \
@new_others;
915 ##3. Delete node from neighbour hash
918 ##4. Now remove edges involving node
919 for my $k (keys %$edges) {
920 ##access via internal hash rather than by object.
921 if ($edges->{$k}->[0] eq $node ||
922 $edges->{$k}->[1] eq $node){
923 ## delete edge from look up hash
924 my $edge_id = $edges->{$k}->object_id();
925 delete $self->{'_edge_id_map'}{$edge_id};
926 delete($edges->{$k});
930 ##5. Now remove node itself;
931 delete $nodes->{$node}{'_node_id'};
932 delete $nodes->{$node};
934 ##6. now remove aliases from look up hash so it can no longer be accessed.
935 ## is this wise? or shall we keep the sequence object available??
940 =head2 unconnected_nodes
942 Name : unconnected_nodes
943 Purpose : return a list of nodes with no connections.
945 Returns : an array or array reference of unconnected nodes
946 Usage : my @ucnodes = $gr->unconnected_nodes();
950 sub unconnected_nodes
{
952 my $neighbours = $self->_neighbors;
953 my $nodes = $self->_nodes;
955 for my $n (keys %$neighbours) {
956 if (@
{$neighbours->{$n}} == 0){
957 push @
$uc_nodes, $nodes->{$n};
960 wantarray?@
$uc_nodes:$uc_nodes;
963 =head2 articulation_points
965 Name : articulation_points
966 Purpose : to find edges in a graph that if broken will fragment
967 the graph into islands.
968 Usage : my $edgeref = $gr->articulation_points();
969 for my $e (keys %$edgeref) {
970 print $e->[0]->accession_number. "-".
971 $e->[1]->accession_number ."\n";
974 Returns : a list references to nodes that will fragment the graph
976 Notes : This is a "slow but sure" method that works with graphs
977 up to a few hundred nodes reasonably fast.
981 sub articulation_points
{
984 ## see if results are cahced already
985 $self->{'_artic_points'} ||= '';
986 return $self->{'_artic_points'} if $self->{'_artic_points'};
989 $self->debug( "doing subgraphs\n");
990 my @subgraphs = $self->components();
994 for my $sg (@subgraphs) {
995 my $all_nodes = $sg->_nodes;
996 $self->debug( "in subgraph - size". scalar (keys %$all_nodes) . "\n");
997 ##ignore isolated vertices
998 next if scalar keys %$all_nodes <= 2;
999 my $neighbors = $sg->_neighbors;
1001 ## find most connected - will be artic point if has >2 neighbors.
1002 ## use this to initiate DFS
1005 for my $n (keys %$neighbors) {
1006 my $c = scalar @
{$neighbors->{$n}};#
1007 ($max, $id) = ($c, $n) if $c > $max;#
1010 my $t = $sg->node_traversal($all_nodes->{$id},'d');
1011 my @nodes = $t->get_all();
1015 $n->{'_node_id'} = $id;
1019 ## cycle through each node
1020 for (my $i = $#nodes; $i >= 0; $i--) {
1022 ## initiate minimumn to node_id
1023 my $curr_min = $all_nodes->{$nodes[$i]}{'_node_id'};
1024 #print STDERR "currmin - $curr_min, i = $i\n";
1025 ## cycle through neighbors, reset minumum if required
1026 my $nbors = $neighbors->{$nodes[$i]};
1027 for my $nbor (@
$nbors) {
1028 my $nbor_id = $all_nodes->{$nbor}{'_node_id'};
1030 ## if is back edge ##
1031 if ($nbor_id < $i) {
1032 $curr_min = $nbor_id if $nbor_id < $curr_min ;
1035 ## else is tree edge
1036 elsif($nbor_id > $i) {
1037 my $wlow = $all_nodes->{$nbor}{'_wlow'};
1038 $curr_min = $wlow if $wlow < $curr_min;
1042 ## now we know the minimum, save.
1043 $all_nodes->{$nodes[$i]}{'_wlow'} = $curr_min;
1045 ## now get tree nodes and test condition
1046 my @treenodes = grep{$all_nodes->{$_}{'_node_id'} > $i}@
$nbors;
1047 for my $tn (@treenodes) {
1048 if(($all_nodes->{$tn}{'_wlow'} >= $i && $i != 0) ||
1049 ($i == 0 && scalar @
{$neighbors->{$nodes[0]}} > 1) ){
1050 $rts{$nodes[$i]} = $nodes[$i] unless exists $rts{$nodes[$i]};
1056 ## cache results and return
1057 $self->{'_artic_points'} = [values %rts]; ##
1058 return $self->{'_artic_points'};
1061 =head2 is_articulation_point
1063 Name : is_articulation_point
1064 Purpose : to determine if a given node is an articulation point or not.
1065 Usage : if ($gr->is_articulation_point($node)) {....
1066 Arguments : a text identifier for the protein or the node itself
1067 Returns : 1 if node is an articulation point, 0 if it is not
1071 sub is_articulation_point
{
1072 my ($self, $val) = @_;
1073 my $node = $self->_check_args($val);
1075 ## this uses a cached value so it does not have to recalculate each time..
1076 my $artic_pt_ref = $self->articulation_points();
1077 my $acc = $node->accession_number;
1078 if (grep{$_->accession_number eq $acc} @
$artic_pt_ref ){
1089 while (my $id = shift@_) {
1090 push @refs, $self->{'_id_map'}{$id};
1096 ## used to check a parameter is a valid node or a text identifier
1097 my ($self, $val) = @_;
1100 $self->throw( "I need a node that's a Bio::AnnotatableI and Bio::IdentifiableI");
1103 ## if param is text try to get sequence object..
1105 $n = $self->nodes_by_id($val);
1107 $self->throw ("Cannnot find node given by the id [$val]");
1110 # if reference should be a NodeI implementing object.
1111 elsif (!$val->isa('Bio::AnnotatableI') || !$val->isa('Bio::IdentifiableI')) {
1112 $self->throw( "I need a node that's a Bio::AnnotatableI and Bio::IdentifiableI ,not a [". ref($val) . "].");
1117 return $n; #n is either a node or undef