3 # BioPerl module for Bio::Network::IO::dip_tab
5 # You may distribute this module under the same terms as perl itself
6 # POD documentation - main docs before the code
10 Bio::Network::IO::dip_tab - class for parsing interaction data in DIP
15 Do not use this module directly, use Bio::Network::IO. For example:
17 my $io = Bio::Network::IO->new(-format => 'dip_tab',
20 my $network = $io->next_network;
24 The Database of Interacting Proteins (DIP) is a protein interaction
25 database (see L<http://dip.doe-mbi.ucla.edu/dip/Main.cgi>).
26 The species-specific subsets of the DIP database are provided in
27 a simple, tab-delimited format. The tab-separated columns are:
41 The source or namespace of the optional id in columns 3 and 8 varies
42 from species to species, and optional ids are frequently absent.
46 The first version of this format prepended the identifier with a
49 DIP:4305E DIP:3048N PIR:B64526 SWP:P23487 GI:2313123 ...
51 The version as of 1/2006 has no database identifiers:
53 DIP:4305E DIP:3048N B64526 P23487 2313123 ...
55 This module parses both versions.
59 The naming system is analagous to the SeqIO system, although usually
60 next_network() will be called only once per file.
66 User feedback is an integral part of the evolution of this and other
67 Bioperl modules. Send your comments and suggestions preferably to one
68 of the Bioperl mailing lists. Your participation is much appreciated.
70 bioperl-l@bioperl.org - General discussion
71 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
75 Please direct usage questions or support issues to the mailing list:
77 I<bioperl-l@bioperl.org>
79 rather than to the module maintainer directly. Many experienced and
80 reponsive experts will be able look at the problem and quickly
81 address it. Please include a thorough description of the problem
82 with code and data examples if at all possible.
86 Report bugs to the Bioperl bug tracking system to help us keep track
87 the bugs and their resolution. Bug reports can be submitted via the
90 http://bugzilla.open-bio.org/
94 Brian Osborne bosborne at alum.mit.edu
95 Richard Adams richard.adams@ed.ac.uk
99 package Bio
::Network
::IO
::dip_tab
;
101 use vars
qw(@ISA $FAC);
102 use Bio::Network::IO;
103 use Bio::Network::ProteinNet;
104 use Bio::Network::Node;
105 use Bio::Seq::SeqFactory;
106 use Bio::Annotation::DBLink;
107 use Bio::Annotation::Collection;
108 use Bio::Network::Interaction;
110 @ISA = qw(Bio::Network::IO Bio::Network::ProteinNet);
113 $FAC = Bio
::Seq
::SeqFactory
->new(-type
=> 'Bio::Seq::RichSeq');
119 Purpose : parses a DIP file and returns a Bio::Network::ProteinNet
121 Usage : my $g = $graph_io->next_network();
123 Returns : a Bio::Network::ProteinNet object
129 my $graph = Bio
::Network
::ProteinNet
->new(refvertexed
=> 1);
131 while (my $l = $self->_readline() ) {
133 ## get line, only gi and node_id always defined
134 my ($interx_id, $node_id1, $o1, $s1, $p1, $g1,
135 $node_id2, $o2, $s2, $p2, $g2, $score) = split '\t', $l;
136 last unless ($interx_id && $g2);
138 ## concatenate correct database name with id
139 ($g1,$g2) = $self->_fix_id("GI",$g1,$g2);
140 ($s1,$s2) = $self->_fix_id("SWP",$s1,$s2);
141 ($p1,$p2) = $self->_fix_id("PIR",$p1,$p2);
142 # ($node_id1,$node_id2) = $self->_fix_id("DIP",$node_id1,$node_id2);
144 ## skip if score is below threshold
145 if ($self->threshold && defined($score)) {
146 next unless $score >= $self->threshold;
149 ## build node object if it's a new node, use DIP id
152 unless ( $node1 = $graph->get_nodes_by_id($node_id1) ) {
153 my $acc = $s1 || $p1 || $g1;
154 my $ac = $self->_add_db_links($acc, $s1, $p1, $node_id1, $g1);
155 my $prot1 = $FAC->create(-accession_number
=> $acc,
160 $node1 = Bio
::Network
::Node
->new(-protein
=> [($prot1)]);
161 $graph->add_node($node1);
162 my @ids = ($g1, $p1, $s1, $node_id1);
163 $graph->add_id_to_node(\
@ids,$node1);
166 unless ( $node2 = $graph->get_nodes_by_id($node_id2) ) {
167 my $acc = $s2 || $p2 || $g2;
168 my $ac = $self->_add_db_links($acc, $s2, $p2, $node_id2, $g2);
169 my $prot2 = $FAC->create(-accession_number
=> $acc,
174 $node2 = Bio
::Network
::Node
->new(-protein
=> [($prot2)]);
175 $graph->add_node($node2);
176 my @ids = ($g2, $p2, $s2, $node_id2);
177 $graph->add_id_to_node(\
@ids,$node2);
180 ## create new Interaction object based on DIP id, weight
181 my $interx = Bio
::Network
::Interaction
->new(-weight
=> $score,
184 $graph->add_interaction(-interaction
=> $interx,
185 -nodes
=> [($node1,$node2)]);
186 $graph->add_id_to_interaction($interx_id,$interx);
194 Purpose : write graph out in dip format
195 Arguments: a Bio::Network::ProteinNet object
197 Usage : $out->write_network($gr);
202 my ($self, $gr) = @_;
203 if ( !$gr || !$gr->isa('Bio::Network::ProteinNet') ) {
204 $self->throw("I need a Bio::Network::ProteinNet, not a [".
208 # Need to have all ids as annotations with database ids as well,
209 # the idea is to be able to round trip, to write it in same way as
212 for my $ref ($gr->edges) {
213 my ($interx,$str,$weight);
215 my $atts = $gr->get_edge_attributes(@
$ref);
216 # there should be only one Interaction if the network is from DIP
217 for my $interx (keys %$atts) {
219 $str = $interx . "\t";
220 $weight = $atts->{$interx}->weight();
223 # add node ids to string
224 for my $node (@
$ref){
225 # print out nodes in dip_tab order
226 my %ids = $gr->get_ids_by_node($node); # need to modify this in graph()
227 # add second tab since we won't write out an optional id
228 $str .= "DIP:" . $ids{DIP
} . "\t\t";
229 for my $name ( qw(UniProt PIR GenBank) ) {
230 $str .= $ids{$name} if (defined $ids{$name});
235 # add weight if defined
236 $str .= $weight . "\t" if $weight;
246 Purpose : create DBLink annotations, add to an Annotation
248 Arguments: an array of ids
249 Returns : an Annotation::Collection object
258 my $ac = Bio
::Annotation
::Collection
->new();
262 $id =~ /^([^:]+):([^:]+)/;
263 my $an = Bio
::Annotation
::DBLink
->new(
266 $ac->add_Annotation('dblink', $an);
286 my $name = $self->_get_standard_name($str);
290 $id = $name . ":" . $1;