More tidying, remove , and so on
[bioperl-network.git] / lib / Bio / Network / ProteinNet.pm
blob31ec26d871c19a0ffe36f0217893462cc1bf186e
2 # BioPerl module for Bio::Network::ProteinNet
4 # You may distribute this module under the same terms as perl itself
5 # POD documentation - main docs before the code
7 =head1 NAME
9 Bio::Network::ProteinNet - a representation of a protein interaction graph.
11 =head1 SYNOPSIS
13 # Read in from file
14 my $graphio = Bio::Network::IO->new(-file => 'human.xml',
15 -format => 'psi25');
16 my $graph = $graphio->next_network();
18 my @edges = $gr->edges;
20 for my $edge (@edges) {
21 for my $node ($edge->[0],$edge->[1]) {
22 my @proteins = $node->proteins;
23 for my $protein (@proteins) {
24 print $protein->display_id," ";
29 =head1 Perl Graph module
31 The bioperl-network package uses the Perl Graph module, use version .86 or greater.
33 =head2 Working with Nodes
35 A Node object represents either a protein or a protein complex. Nodes can
36 be retrieved through their identifiers:
38 # Get a node (represented by a sequence object) from the graph.
39 my $node = $graph->get_nodes_by_id('UniProt:P12345');
41 # A node that's a protein can be treated just like a Sequence object
42 print $node->seq;
44 # Remove a node by specifying its identifier
45 $graph->remove_nodes($graph->get_nodes_by_id('UniProt:P12345'));
47 # How many nodes are there?
48 my $ncount = $graph->nodes();
50 # Get interactors of your favourite protein
51 my $node = $graph->get_nodes_by_id('RefSeq:NP_023232');
52 my @neighbors = $graph->neighbors($node);
53 print " NP_023232 interacts with ";
54 print join " ,", map{$_->primary_id()} @neighbors;
55 print "\n";
57 # Annotate your sequences with interaction info
58 my @seq_objects = ($seq1, $seq2, $seq3);
59 for my $seq (@seq_objects) {
60 if ( $graph->get_nodes_by_id($seq->accession_number) ) {
61 my $node = $graph->get_nodes_by_id( $seq->accession_number);
62 my @neighbors = $graph->neighbors($node);
63 for my $n (@neighbors) {
64 my $ft = Bio::SeqFeature::Generic->new(
65 -primary_tag => 'Interactor',
66 -tag => { id => $n->accession_number }
68 $seq->add_SeqFeature($ft);
73 # Get proteins with > 10 interactors
74 my @nodes = $graph->nodes();
75 my @hubs;
76 for my $node (@nodes) {
77 if ($graph->neighbors($node) > 10) {
78 push @hubs, $node;
81 print "the following proteins have > 10 interactors:\n";
82 print join "\n", map {$_->primary_id()} @hubs;
84 # Get clustering coefficient of a given node.
85 my $id = "RefSeq:NP_023232";
86 my $cc = $graph->clustering_coefficient($graph->get_nodes_by_id($id));
87 if ($cc != -1) { ## result is -1 if cannot be calculated
88 print "CC for $id is $cc";
91 =head2 Working with Edges
93 # How many edges are there?
94 my $ecount = $graph->edges;
96 # Get all the paired nodes, or edges, in the graph as an array
97 my @edges = $graph->edges
99 =head2 Working with Interactions
101 # How many interactions are there?
102 my $icount = $graph->interactions;
104 # Retrieve all interactions
105 my @interx = $graph->interactions;
107 # Get interactions above a threshold confidence score
108 for my $interx (@interx) {
109 if ($interx->weight > 0.6) {
110 print $interx->primary_id, "\t", $interx->weight, "\n";
114 =head2 Working with Graphs
116 # Get graph density
117 my $density = $graph->density();
119 # Get connected sub-graphs
120 my @graphs = $graph->connected_components();
122 # Copy interactions from one graph to another
123 $graph1->add_interactions_from($graph2);
126 =head2 Creating networks from your own data
128 If you have interaction data in your own format, e.g.
130 <interaction id> <protein id 1> <protein id 2> <score>
132 A simple approach would look something like this:
134 my $io = Bio::Root::IO->new(-file => 'mydata');
135 my $graph = Bio::Network::ProteinNet->new(refvertexed => 1);
137 while (my $l = $io->_readline() ) {
138 my ($id, $nid1, $nid2, $sc) = split /\s+/, $l;
140 my $prot1 = Bio::Seq->new(-accession_number => $nid1);
141 my $prot2 = Bio::Seq->new(-accession_number => $nid2);
143 # create new Interaction object based on an id and weight
144 my $interaction = Bio::Network::Interaction->new(-id => $id,
145 -weight => $sc );
146 $graph->add_interaction(-nodes => [($prot1,$prot2)]),
147 -interaction => $interaction );
151 =head1 DESCRIPTION
153 A ProteinNet is a representation of a protein interaction network.
154 Its functionality comes from the L<Graph> module of Perl and from BioPerl,
155 the nodes or vertices in the network are Sequence objects.
157 =head2 Nodes
159 A node is one or more BioPerl sequence objects, a L<Bio::Seq> or
160 L<Bio::Seq::RichSeq> object. Essentially the graph can use any objects
161 that implement L<Bio::AnnotatableI> and L<Bio::IdentifiableI> interfaces
162 since these objects hold useful identifiers. This is relevant since the
163 identity of nodes is determined by their identifiers.
165 =head2 Interactions and Edges
167 Since bioperl-network is built on top of the L<Graph> and L<Graph::Undirected>
168 modules of Perl it uses its formal model as well. An Edge corresponds to a
169 pair of nodes, and there is only one Edge per pair. An Interaction is an
170 attribute of an Edge, and there can be 1 or more Interactions per Edge. So
172 $ecount = $network->edges
174 Tells you how many paired nodes there are and
176 $icount = $network->interactions
178 Tells you how many node-node interactions there are. An Interaction is
179 equivalent to one experiment or one experimental observation.
181 =head1 FOR DEVELOPERS
183 In this module, the nodes or vertexes are represented by L<Bio::Seq>
184 objects containing database identifiers but usually
185 without sequence, since the data is parsed from protein-protein
186 interaction data.
188 Interactions should be L<Bio::Network::Interaction> objects, which are
189 L<Bio::IdentifiableI> implementing objects. At present Interactions only
190 have an identifier and a weight() method, to hold confidence data.
192 A ProteinNet object has the following internal data, aside from the data
193 structures of Graph itself:
195 =over 2
197 =item _id_map
199 Look-up hash ('_id_map') for finding a node using any of its ids. The keys
200 are standard identifiers (e.g. "GenBank:A12345") and the values are
201 memory addresses used by Graph (e.g. "Bio::Network::Node=HASH(0x1bc53e4)").
203 =item _interx_id_map
205 Look-up hash for Interactions ('_interx_id_map'),used for retrieving an
206 Interaction object using an identifier. The keys are primary ids of the
207 Interaction (e.g. "DIP:2341E") and the values are addresses of
208 Interactions (e.g. "Bio::Network::Interaction=HASH(0x1bc46f2)").
210 =back
212 The function of these hashes is either to facilitate fast lookups or
213 to cache data.
215 =head1 API CHANGES
217 These modules were first released as part of the core BioPerl package
218 and were called Bio::Graph. Bio::Graph was copied to a separate package,
219 bioperl-network, and renamed Bio::Network. All of the modules were
220 revised and a new module, Interaction.pm, was added. The
221 functionality of the PSI MI parser, IO/psi.pm, was significantly
222 enhanced.
224 Graph manipulation in Bio::Graph was based on the Bio::Graph::SimpleGraph
225 module by Nat Goodman. The first release as a separate package,
226 bioperl-network, replaced SimpleGraph with the Perl Graph package. Other
227 API changes were also made, partly to keep nomenclature consistent with
228 BioPerl, partly to use the terms used by the interaction databases, and
229 partly to accomodate the differences between Graph and
230 Bio::Graph::SimpleGraph.
232 The advantages to using Graph are that Bioperl developers are not
233 responsible for maintaining the code that actually handles graph
234 manipulation and there is more functionality in Graph than in SimpleGraph.
236 =over 13
238 =item Bio::Graph::Edge
240 Bio::Graph::Edge has been replaced by Bio::Network::Interaction
241 and Bio::Network::Edge
243 =item next_graph()
245 This method has been replaced by next_network().
247 =item union()
249 The union() method has been removed since it was not performing a true
250 union. It has been replaced by L<add_interaction_from>
252 =item remove_nodes()
254 remove_nodes() is now an alias to Graph::delete_vertices
256 =item _get_ids_by_db()
258 _get_ids_by_db() has been renamed L<get_ids_by_node>
260 =item add_node()
262 add_node() is now an alias to Graph::add_vertex
264 =item components()
266 components() is now an alias to Graph::connected_components
268 =item edge_count()
270 edge_count() is now an alias to Graph::edges
272 =item node_count()
274 node_count() is now an alias to Graph::vertices
276 =item nodes_by_id()
278 nodes_by_id() is now an alias to L<get_nodes_by_id>
280 =item edge_by_id()
282 This method has been removed since edges no longer have identifiers,
283 Interactions do. Use L<get_interaction_by_id>
285 =item unconnected_nodes()
287 unconnected_nodes() is now an alias to Graph::isolated_vertices
289 =item object_id()
291 object_id() is now an alias to Interaction::primary_id()
293 =back
295 =head1 REQUIREMENTS
297 To use this module you need Graph.pm, version .80 or greater. To
298 read XML data (e.g. PSI XML) you will need XML::Twig.
300 =head1 SEE ALSO
302 L<Bio::Network::IO>
303 L<Bio::Network::Edge>
304 L<Bio::Network::Node>
305 L<Bio::Network::Interaction>
306 L<Bio::Network::IO::dip>
307 L<Bio::Network::IO::psi>
309 =head1 FEEDBACK
311 =head2 Mailing Lists
313 User feedback is an integral part of the evolution of this and other
314 Bioperl modules. Send your comments and suggestions preferably to one
315 of the Bioperl mailing lists. Your participation is much appreciated.
317 bioperl-l@bioperl.org - General discussion
318 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
320 =head2 Support
322 Please direct usage questions or support issues to the mailing list:
324 I<bioperl-l@bioperl.org>
326 rather than to the module maintainer directly. Many experienced and
327 reponsive experts will be able look at the problem and quickly
328 address it. Please include a thorough description of the problem
329 with code and data examples if at all possible.
331 =head2 Reporting Bugs
333 Report bugs to the Bioperl bug tracking system to help us keep track
334 the bugs and their resolution. Bug reports can be submitted via the
335 web:
337 http://bugzilla.open-bio.org/
339 =head1 AUTHORS
341 Brian Osborne bosborne at alum.mit.edu
342 Richard Adams richard.adams@ed.ac.uk
344 Maintained by Brian Osborne
346 The first version of this package was based on the Bio::Graph::SimpleGraph
347 module written by Nat Goodman.
349 =cut
351 package Bio::Network::ProteinNet;
352 use strict;
353 use Graph 0.86;
354 use Bio::Network::Interaction;
355 use Bio::Root::Root;
356 use vars qw($GRAPH_ARRAY_INDEX @ISA);
357 @ISA = qw( Graph::Undirected Bio::Root::Root );
359 # A Graph object is an array reference, therefore we need
360 # to add all our additional data at a specific, arbitrary index
361 $GRAPH_ARRAY_INDEX = 5;
364 =head2 get_interaction_by_id
366 Name : get_interaction_by_id
367 Purpose : Get an interaction using an id
368 Usage : $interx = $g->get_interaction_by_id($id)
369 Returns : One or more Interactions
370 Arguments : One or more Interaction identifiers, the primary id
372 =cut
374 sub get_interaction_by_id {
375 my ($self,@ids) = @_;
376 $self->throw("I need an identifier!") unless (@ids);
377 my @interx;
378 for my $id (@ids) {
379 my @temp;
380 push @temp, $self->[$GRAPH_ARRAY_INDEX]->{'_interx_id_map'}->{$id};
381 $self->warn("More than 1 Interaction retrieved using id $id") if ($#temp > 0);
382 push @interx,@temp;
384 scalar @interx == 1 ? return $interx[0] : return @interx;
387 =head2 get_nodes_by_id
389 Name : get_nodes_by_id
390 Purpose : Get node using an id
391 Usage : $node = $g->get_nodes_by_id($id)
392 Returns : One node
393 Arguments : One or more protein identifiers
395 =cut
397 sub get_nodes_by_id {
398 my $self = shift;
399 my @ids = @_;
400 my @nodes = $self->_ids(@ids);
401 unless (@nodes) {
402 my $str = join " ",@ids;
403 $self->warn("No nodes retrieved using these ids: $str");
404 return 0;
406 if ($#nodes > 0) {
407 my $str = join " ",@ids;
408 #$self->warn("Returning >1 node retrieved using these ids: $str");
409 return @nodes;
411 $nodes[0];
414 =head2 get_interactions
416 Name : get_interactions
417 Purpose : Get 1 or more Interaction objects given a pair of nodes
418 Usage : @interx = $g->get_interactions($n1,$n2)
419 Returns : A hash of Interaction objects where the key is the primary
420 id of the Interaction and the value is the Interaction
421 Arguments : 2 nodes
422 Notes :
424 =cut
426 sub get_interactions {
427 my ($self,@nodes) = @_;
429 $self->throw("The get_interactions method needs 2 nodes, not ".
430 scalar @nodes . " nodes") if ($#nodes != 1);
432 for my $node (@nodes) {
433 $self->throw("Node must be a Bio::Network::Node object, not a [". ref($node) . "].")
434 unless ($node->isa("Bio::Network::Node"));
437 my $interactions = $self->get_edge_attributes(@nodes);
438 %$interactions;
441 =head2 add_id_to_interaction
443 Name : add_id_to_interaction
444 Purpose : Store identifiers in an internal hash that is used to look
445 up interactions by id - this does not add ids to Interaction
446 objects.
447 Usage : $g->add_id_to_interaction($id,$interaction)
448 Arguments : Identifier and Interaction object.
449 Returns :
450 Notes : The identifier should be concatenated
451 with a database or namespace name in order to make
452 accurate comparisons when you are merging data from different
453 formats. Examples: DIP:3455E.
454 Use _get_standard_name() to find a standardized name.
456 See L<_get_standard_name>
458 =cut
460 sub add_id_to_interaction {
461 my ($g,$id,$interx) = @_;
463 $g->throw("Node must be a Bio::Network::Interaction object, not a ["
464 . ref($interx) . "]." ) unless ($interx->isa("Bio::Network::Interaction"));
466 $g->[$GRAPH_ARRAY_INDEX]->{'_interx_id_map'}->{$id} = $interx if $id;
469 =head2 add_id_to_node
471 Name : add_id_to_node
472 Purpose : Store identifiers in an internal hash that is used to look
473 up nodes by id - this does not add ids to Node objects
474 or their associated Annotation objects.
475 Usage : $g->add_id_to_node($id,$node) or
476 $g->add_id_to_node(\@ids,$node)
477 Arguments : Identifier (or reference to an array of identifiers), node.
478 Returns :
479 Notes : The identifier should be concatenated
480 with a database or namespace name in order to make
481 accurate comparisons when you are merging data from different
482 formats. Examples: DIP:3455N, UniProt:Q45772, GenBank:7733911.
483 Use _get_standard_name() to find a standardized name.
485 See L<_get_standard_name>
487 =cut
489 sub add_id_to_node {
490 my ($g,$id,$node) = @_;
492 $g->throw("Node must be a Bio::Network::Node object, not a ["
493 . ref($node) . "]." ) unless ($node->isa("Bio::Network::Node"));
494 $g->throw("Node $node does not exist, cannot add edge")
495 unless ($g->has_node($node));
497 if (ref $id eq "ARRAY") {
498 my @ids = @$id;
499 for my $id (@ids) {
500 next unless $id;
501 $g->[$GRAPH_ARRAY_INDEX]->{'_id_map'}->{$id} = $node;
503 } else {
504 $g->[$GRAPH_ARRAY_INDEX]->{'_id_map'}->{$id} = $node if $id;
508 =head2 add_interactions_from
510 Name : add_interactions_from
511 Purpose : To copy interactions from one graph to another
512 Usage : $graph1->add_interactions_from($graph2)
513 Returns : void
514 Arguments : A Graph object of the same class as the calling object.
515 Description : This method copies interactions from the graph passed as the
516 argument to the calling graph. To take account of
517 differing IDs identifying the same protein, all ids are
518 compared. The following rules are used:
520 1. If a pair of nodes exist in both graphs then:
521 a. No Interactions with the same primary id will be copied
522 from $graph2 to $graph1.
523 b. All other Interactions from $graph2 will be copied
524 to $graph1, even if these nodes do not interact in $graph1.
526 2. Nodes are never copied from $graph2 to $graph1. This is rather
527 conservative but prevents the problem of having duplicated,
528 identical nodes in $graph1 due to the same protein being identified
529 by different ids in the 2 graphs.
531 So, for example
533 Interaction N1 N2 Comment
535 Graph 1: E1 P1 P2
536 E2 P3 P4
537 E3 P1 P4
539 Graph 2: E1 P1 P2 E1 will not be copied to Graph1
540 X2 P1 P3 X2 will be copied to Graph 1
541 X3 P1 P4 X3 will be copied to Graph 1
542 X4 Z4 Z5 Nothing copied to Graph1
544 There are measures one could take to allow copying nodes from $graph2
545 to $graph1, currently unimplemented:
547 1. Use sequence, if available, and some threshold measure of similarity,
548 or length, to prove that proteins are not identical and can be copied.
550 2. Use species information. For example, if $graph1 is entirely composed
551 of human proteins then any non-human proteins could be copied to
552 $graph1 without risk (and cross-species interactions are fairly common
553 due the nature of interaction experiments).
555 3. Use namespace or dataspace when assessing identity. For example, assume
556 that all nodes in $graph1 are identified by Swissprot ids. Assume a
557 protein in $graph2 is also identified by a Swissprot id, not found in
558 $graph1. This could be reasonable grounds for allowing the protein in
559 $graph2 to be copied to $graph1.
561 4. Some combination of the above.
563 =cut
565 sub add_interactions_from {
566 my ($graph1, $graph2) = @_;
567 my $class = ref($graph1);
568 $graph1->throw("add_interaction_from() needs a ". $class . " object, not a [".
569 ref($graph2). "] object") unless ($graph2->isa($class));
570 my (%common_ids,%common_nodes);
572 # get identifiers found in both graphs
573 for my $id ( (keys %{$graph1->[$GRAPH_ARRAY_INDEX]->{'_id_map'}}),
574 (keys %{$graph2->[$GRAPH_ARRAY_INDEX]->{'_id_map'}}) ) {
575 $common_ids{$id}++;
577 # get nodes corresponding to identifiers found in both graphs
578 for my $id (keys %common_ids) {
579 if ($common_ids{$id} == 2) {
580 if (defined $graph1->[$GRAPH_ARRAY_INDEX]->{'_id_map'}{$id} &&
581 defined $graph2->[$GRAPH_ARRAY_INDEX]->{'_id_map'}{$id} ) {
582 $common_nodes{$graph2->[$GRAPH_ARRAY_INDEX]->{'_id_map'}{$id}} =
583 $graph1->[$GRAPH_ARRAY_INDEX]->{'_id_map'}{$id};
587 # get all the edges in $graph2, if both nodes for a given edge are in
588 # $graph1 then interactions can be copied, unless it's already in $graph1
589 my @edges = $graph2->edges;
590 for my $edgeref (@edges) {
591 if (defined $common_nodes{$edgeref->[0]} &&
592 defined $common_nodes{$edgeref->[1]} ) {
593 my $attref2 = $graph2->get_edge_attributes($edgeref->[0],$edgeref->[1]); # nothing
594 if ($graph1->has_edge($common_nodes{$edgeref->[0]},
595 $common_nodes{$edgeref->[1]}) ) {
596 # interactions for the given pair are in both graphs...
597 my $attref1 = $graph1->get_edge_attributes($common_nodes{$edgeref->[0]},
598 $common_nodes{$edgeref->[1]});
599 for my $interxid (keys %$attref2) {
600 #...so check to see if their primary id's are the same or not
601 unless (defined $attref1->{$interxid}) {
602 $graph1->add_interaction(-nodes => [($common_nodes{$edgeref->[0]},
603 $common_nodes{$edgeref->[1]})],
604 -interaction => $attref2->{$interxid});
607 } else {
608 # a pair of nodes in $graph2 interact but don't interact in $graph1
609 for my $interxid (keys %$attref2) {
610 $graph1->add_interaction(-nodes => [($common_nodes{$edgeref->[0]},
611 $common_nodes{$edgeref->[1]})],
612 -interaction => $attref2->{$interxid});
619 =head2 subgraph
621 Name : subgraph
622 Purpose : Construct a subgraph of nodes from another network, including
623 all Interactions.
624 Usage : my $subgraph = $graph->subgraph(@nodes).
625 Returns : A subgraph composed of nodes, edges, and Interactions from the
626 original graph.
627 Arguments : A list of nodes.
629 =cut
631 sub subgraph {
632 my ($self,@nodes) = @_;
633 my $class = ref($self);
634 my $subgraph = new $class;
635 my @pairs = ();
637 $subgraph->add_node(@nodes);
639 # retrieve and add interacting pairs of nodes and Interactions
640 @pairs = $self->_all_pairs(@nodes) if ($#nodes > 0);
641 for my $pair (@pairs) {
642 if ( $self->has_edge(@$pair) ) {
643 my $ref = $self->get_edge_attributes(@$pair);
644 for my $id (keys %$ref) {
645 $subgraph->add_interaction(-nodes => $pair,
646 -interaction => $ref->{$id} );
650 # add isolated nodes that weren't found as interacting pairs, above
651 for my $node (@nodes) {
652 $subgraph->add_node($node) unless ($subgraph->has_node($node))
654 $subgraph;
657 =head2 get_ids_by_node
659 Name : get_ids_by_node
660 Purpose : Gets all ids for a node
661 Arguments: A Bio::SeqI object
662 Returns : A hash: Keys are db ids, values are identifiers
663 Usage : my %ids = $gr->get_ids_by_node($seqobj);
665 =cut
667 sub get_ids_by_node {
668 my %ids;
669 my $self = shift;
670 while (my $node = shift @_ ){
671 $node->throw("I need a Bio::Network::Node object, not a [" .ref($node) ."]")
672 unless ( $node->isa('Bio::Network::Node') );
673 ## If Bio::Seq get dbxref ids as well.
674 my @proteins = $node->proteins;
675 for my $protein (@proteins) {
676 my $ac = $protein->annotation();
677 for my $an ($ac->get_Annotations('dblink')) {
678 $ids{$an->database()} = $an->primary_id();
682 return %ids;
685 =head2 add_interaction
687 Name : add_interaction
688 Purpose : Adds an Interaction to a graph.
689 Usage : $gr->add_interaction(-interaction => $interx
690 -nodes => \@nodes );
691 Arguments : An Interaction object and a reference to an array holding
692 a pair of nodes
693 Returns :
694 Description : This is the method to use to add an interaction to a graph.
696 =cut
698 sub add_interaction {
699 my $self = shift;
700 my ($interx,$nodesref) = $self->_rearrange([qw(INTERACTION NODES)],@_);
701 my @nodes = @$nodesref;
702 #my $interxid = $interx->primary_id;
704 $self->throw("The add_edge method needs 2 nodes, not ". scalar @nodes .
705 " nodes") if ($#nodes != 1);
707 for my $node (@nodes) {
708 unless ( $node->isa("Bio::Network::Node") ) {
709 if ( $node->isa("Bio::Seq") ) {
710 # $self->warn("Node must be a Bio::Node object, not a [Bio::Seq]. " .
711 # "Will make a Node object from it.");
712 $node = Bio::Network::Node->new(-protein => [($node)]);
713 } else {
714 $self->throw("Cannot make an interaction using a [". ref($node) .
715 "].")
720 $self->add_edge($nodes[0], $nodes[1]);
721 $self->set_edge_attribute($nodes[0], $nodes[1], $interx->primary_id, $interx);
722 $self->add_id_to_interaction($interx->primary_id, $interx);
723 # Store the node names in the Interaction object
724 $interx->{_nodes} = $nodesref;
728 =head2 add_edge
730 Name : add_edge
731 Purpose :
732 Usage : $gr->add_edge(@nodes)
733 Arguments : A pair of nodes
734 Returns :
735 Description :
737 =cut
739 sub add_edge {
740 my ($self,@nodes) = @_;
742 $self->throw("The add_edge method needs 2 nodes, not ". scalar @nodes .
743 " nodes") if ($#nodes != 1);
745 for my $node (@nodes) {
746 $self->throw("Node must be a Bio::Network::Node object, not a [". ref($node) . "].")
747 unless ($node->isa("Bio::Network::Node"));
749 $self->SUPER::add_edge(@nodes);
752 =head2 add_vertex
754 Name : add_vertex
755 Purpose : Adds a node to a graph.
756 Usage : $gr->add_vertex($n)
757 Arguments : A Bio::Network::Node object
758 Returns :
759 Description :
761 =cut
763 sub add_vertex {
764 my ($self,$node) = @_;
765 $self->throw("Node must be a Node object, not a ["
766 . ref($node) . "]") unless ($node->isa("Bio::Network::Node"));
768 if ($self->has_node($node)) {
769 # $self->warn("Graph already has node with id " . $node->display_id
770 # . ", will not add it.");
771 return;
773 $self->SUPER::add_vertex($node);
776 =head2 add_node
778 Name : add_node
779 Purpose : Alias to add_vertex
780 Usage : $gr->add_node($node)
781 Arguments : A Bio::Network::Node object
782 Returns :
783 Description :
785 =cut
787 sub add_node {
788 my ($self,$node) = @_;
789 $self->add_vertex($node);
792 =head2 clustering_coefficient
794 Name : clustering_coefficient
795 Purpose : Determines the clustering coefficient of a node, a number
796 in range 0-1 indicating the extent to which the neighbors of
797 a node are interconnnected.
798 Arguments : A Node or a text identifier
799 Returns : The clustering coefficient. 0 is a valid result.
800 If the CC is not calculable ( if the node has <2 neighbors),
801 returns -1.
802 Usage : my $node = $gr->get_nodes_by_id('P12345');
803 my $cc = $gr->clustering_coefficient($node);
805 =cut
807 sub clustering_coefficient {
808 my ($self,$node) = @_;
810 $self->throw("[$node] is an incorrect parameter, not present in the graph")
811 unless defined($node);
812 $self->throw("[$node] is an incorrect parameter, not present in the graph")
813 unless ($node->isa("Bio::Network::Node"));
815 my @n = $self->neighbors($node);
816 my $n_count = scalar @n;
817 my $c = 0;
819 ## calculate cc if we can
820 if ($n_count >= 2){
821 for (my $i = 0; $i <= $#n; $i++ ) {
822 for (my $j = $i+1; $j <= $#n; $j++) {
823 if ($self->has_edge($n[$i], $n[$j])){
824 $c++;
828 $c = 2 * $c / ($n_count *($n_count - 1));
829 return $c; # can be 0 if unconnected.
830 }else{
831 return -1; # if value is not calculable
835 =head2 remove_nodes
837 Name : remove_nodes
838 Purpose : Alias to Graph::delete_vertices
839 Usage : $graph2 = $graph1->remove_nodes($node);
840 Arguments : A single Node object or a list of Node objects
841 Returns : A Graph with the given nodes deleted
842 Notes :
844 =cut
846 sub remove_nodes {
847 my ($self,@nodes) = @_;
848 my $g = $self->SUPER::delete_vertices(@nodes);
852 =head2 get_random_edge
854 Name : get_random_edge
855 Purpose : Alias to Graph::random_edge
856 Usage : $edge = $graph1->get_random_edge;
857 Arguments :
858 Returns : An Edge object
859 Notes :
861 =cut
863 sub get_random_edge {
864 my $self = shift;
865 my $e = $self->random_edge;
869 =head2 get_random_node
871 Name : get_random_node
872 Purpose : Alias to Graph::random_vertex
873 Usage : $node = $graph1->get_random_node;
874 Arguments :
875 Returns : A Node object
876 Notes :
878 =cut
880 sub get_random_node {
881 my $self = shift;
882 my $n = $self->random_vertex;
886 =head2 is_forest
888 Name : is_forest
889 Purpose : Determine if a graph is a forest (2 or more trees)
890 Usage : if ($gr->is_forest){ ..... }
891 Arguments : none
892 Returns : 1 or ""
894 =cut
896 sub is_forest {
897 my $self = shift;
898 return 1 if (!$self->is_connected && !$self->is_cyclic);
899 return "";
902 =head2 is_tree
904 Name : is_tree
905 Purpose : Determine if the graph is a tree
906 Usage : if ($gr->is_tree){ ..... }
907 Arguments : None
908 Returns : 1 or ""
910 =cut
912 sub is_tree {
913 my $self = shift;
914 return 1 if ($self->is_connected && !$self->is_cyclic);
915 return "";
918 =head2 is_empty
920 Name : is_empty
921 Purpose : Determine if graph has no nodes
922 Usage : if ($gr->is_empty){ ..... }
923 Arguments : None
924 Returns : 1 or ""
926 =cut
928 sub is_empty {
929 my $self = shift;
930 my @nodes = $self->vertices;
931 return 1 if (scalar @nodes == 0);
932 return "";
935 sub unconnected_nodes {
936 my $self = shift;
937 return $self->SUPER::isolated_vertices;
940 =head2 articulation_points
942 Name : articulation_points
943 Purpose : Find nodes in a graph that if removed will fragment
944 the graph into sub-graphs.
945 Usage : my @nodes = $gr->articulation_points
947 my $count = $gr->articulation_points
948 Arguments : None
949 Returns : An array or a count of the array of nodes that will fragment
950 the graph if deleted.
951 Notes : This method is currently broken due to bugs in Graph v. .69
952 and later
954 =cut
956 sub articulation_points {
957 my $self = shift;
958 my @nodes = $self->SUPER::articulation_points;
959 wantarray ? @nodes : scalar @nodes;
962 =head2 is_articulation_point
964 Name : is_articulation_point
965 Purpose : Determine if a given node is an articulation point or not.
966 Usage : if ($gr->is_articulation_point($node)) {....}
967 Arguments : A node (Sequence object)
968 Returns : 1 if node is an articulation point, 0 if it is not
969 Notes : This method is currently broken due to bugs in Graph v. .69
971 =cut
973 sub is_articulation_point {
974 my ($self,$node) = @_;
976 $self->throw("$node is an incorrect parameter, not present in the graph")
977 unless ( $node->isa("Bio::Network::Node") );
979 my @artic_points = $self->articulation_points();
980 grep /$node/,@artic_points ? return 1 : return 0;
983 =head2 nodes
985 Name : nodes
986 Purpose : Alias to Graph::vertices()
987 Arguments:
988 Returns : An integer
989 Usage : my $count = $graph->nodes;
991 =cut
993 sub nodes {
994 my $self = shift;
995 if (wantarray) {
996 my @ns = $self->vertices;
997 return @ns;
998 } else {
999 return scalar $self->vertices;
1003 =head2 has_node
1005 Name : has_node
1006 Purpose : Alias to Graph::has_vertex
1007 Arguments:
1008 Returns : True if the node exists
1009 Usage : if ( $graph->has_node($node) ){ ... }
1011 =cut
1013 sub has_node {
1014 my ($self,$node) = @_;
1015 return $self->has_vertex($node);
1019 =head2 interactions
1021 Name : interactions
1022 Purpose : Count the total number of Interactions in the network (an Edge can
1023 have one or more Interactions) or retrieve all the Interactions in
1024 the network as an array
1025 Usage : my $count = $gr->interactions or
1026 my @interx = $gr->interactions
1027 Arguments:
1028 Returns : A number or an array of Interactions
1029 Notes :
1031 =cut
1033 sub interactions {
1034 my $self = shift;
1035 if (wantarray) {
1036 my @interx;
1037 for my $id (keys %{$self->[$GRAPH_ARRAY_INDEX]->{'_interx_id_map'}}) {
1038 push @interx, $self->[$GRAPH_ARRAY_INDEX]->{'_interx_id_map'}->{$id};
1040 return @interx;
1041 } else {
1042 return scalar keys %{$self->[$GRAPH_ARRAY_INDEX]->{'_interx_id_map'}};
1046 =head2 nodes_by_id
1048 Name : nodes_by_id
1049 Purpose : Alias to get_nodes_by_id
1050 Notes : Deprecated
1052 =cut
1054 sub nodes_by_id {
1055 my $self = shift;
1056 my @ids = @_;
1057 return $self->get_nodes_by_id(@ids);
1060 =head2 edge_count
1062 Name : edge_count
1063 Purpose : Alias to edges()
1064 Notes : Deprecated, use edges()
1066 =cut
1068 sub edge_count {
1069 my $self = shift;
1070 return scalar $self->edges;
1073 =head2 neighbor_count
1075 Name : neighbor_count
1076 Purpose : Alias to Graph::neighbors
1077 Usage : my $count = $gr->neighbor_count($node)
1078 Arguments : A node
1079 Returns : An integer
1080 Notes : Deprecated
1082 =cut
1084 sub neighbor_count{
1085 my ($self,$node) = @_;
1086 return scalar $self->SUPER::neighbors($node);
1089 =head2 node_count
1091 Name : node_count
1092 Purpose : Alias to Graph::vertices()
1093 Notes : Deprecated, use nodes()
1095 =cut
1097 sub node_count {
1098 my $self = shift;
1099 return scalar $self->vertices;
1102 =head2 components
1104 Name : components
1105 Purpose : Alias to Graph::connected_components
1106 Usage : my @components = $gr->components
1107 Arguments :
1108 Returns :
1109 Notes : Deprecated
1111 =cut
1113 sub components {
1114 my $self = shift;
1115 return $self->connected_components;
1118 =head2 unconnected_nodes
1120 Name : unconnected_nodes
1121 Purpose : Alias to Graph::isolated_vertices
1122 Arguments : None
1123 Returns : An array of unconnected nodes
1124 Notes : Deprecated
1126 =cut
1128 =head2 _all_pairs
1130 Name : _all_pairs
1131 Purpose : Find unique set of all pairwise combinations
1132 Usage : my @pairs = $self->_all_pairs(@arr)
1133 Arguments : An array
1134 Returns : An array of array references, each array in the 2nd dimension
1135 is a 2-element array
1137 =cut
1139 sub _all_pairs {
1140 my ($self,@arr) = @_;
1141 my @pairs = ();
1142 $self->throw("Must pass an array with at least 2 elements to _all_pairs()")
1143 unless ($#arr > 0);
1144 for (my $x = 0 ; $x < $#arr ; $x++) {
1145 for (my $y = $x ; $y < $#arr ; $y++ ) {
1146 push @pairs, [($arr[$x],$arr[($y + 1)])];
1149 @pairs;
1152 =head2 _ids
1154 Name : _ids
1155 Purpose :
1156 Usage :
1157 Arguments :
1158 Returns :
1160 =cut
1162 sub _ids {
1163 my $self = shift;
1164 my @refs;
1165 while (my $id = shift) {
1166 push @refs, $self->[$GRAPH_ARRAY_INDEX]->{'_id_map'}->{$id};
1168 return @refs;
1173 __END__
1175 =head2 next_interaction
1177 Name : next_interaction
1178 Purpose : Retrieve Interactions using an edge
1179 Usage : while (my $interx = $edge->next_interaction){ ... }
1180 Returns : Interactions, one by one.
1181 Arguments :
1183 =cut
1185 sub next_interaction {
1190 =head2 next_edge
1192 Name : next_edge
1193 Purpose : Retrieve all edges
1194 Usage : while (my $edge = $graph->next_edge){ ... }
1195 Returns : Edges, one by one.
1196 Arguments :
1198 =cut
1200 sub next_edge {
1205 =head2 next_node
1207 Name : next_node
1208 Purpose : Retrieve all nodes
1209 Usage : while (my $node = $graph->next_node){ ... }
1210 Returns : Nodes, one by one.
1211 Arguments :
1213 =cut
1215 sub next_node {