comment on deprecation added.
[perlcyc.git] / bin / perlcyc_dump_compounds.pl
blobbc3bdcaf90376c6ea7b11711176789f5dbd40599
1 #!/usr/local/bin/perl -w
2 use strict;
3 use perlcyc;
4 use Data::Dumper;
7 ##modified to print out molecular weight as well. -peifenz 20060503
9 ##This is based on the script dump_compounds.pl. The scrip will pull out all compounds of all pathways. For each compound, it will print out common name, synonyms, compound hierarchy, chemical formula, smiles formula, CAS links, all the reaction equations containing the compound, and all the pathways containing each of the reactions. -PeifenZ 8/15/2004
11 print "Compound_common_name\tCompound_synonyms\tMolecular_weight\tChemical_formula\tSmiles\tLinks\tEC\tReaction_equation\tPathway\n";
13 my $cyc = new perlcyc("ARA");
14 my @pathways = $cyc -> all_pathways ();
15 my %compoundsHash;
17 sub prepareCompoundsHash {
18 for my $pathwayFrameId (@pathways) {
19 for my $compoundFrameId ($cyc -> compounds_of_pathway ($pathwayFrameId)) {
20 push @{$compoundsHash{$compoundFrameId}}, $pathwayFrameId;
25 prepareCompoundsHash();
26 ## Returns all the compounds (id) of all pathways
27 sub all_compounds {
28 return keys %compoundsHash;
32 ## Returns a list of pathways (id) containing a reaction
33 sub pathways_of_reaction {
34 my $reaction = shift;
35 my @pathways = $cyc -> get_slot_values($reaction, "In-pathway");
36 return @pathways;
39 ## Returns a list of synonyms of a compound
40 sub synonyms_of_compound {
41 my $compound = shift;
42 return $cyc -> get_slot_values ($compound, "Synonyms");
46 ## Returns the common name of a frame
47 sub getCommonName {
48 my $frameId = shift;
49 return $cyc->get_name_string ($frameId, strip_html => 1);
53 ## Returns a list of db links. ie: ("CAS:57-5", "(CAS:34-04-3")
54 sub getDbLinks {
55 my ($cyc, $compound) = @_;
56 my @links;
57 for my $link_pair ($cyc -> get_slot_values($compound, "DBLINKS")) {
58 push @links, $link_pair->[0] . ":" . $link_pair->[1];
60 return @links;
64 for my $compound (all_compounds()) {
65 ## not all compounds are frames, so we need to skip over those that aren't frames
66 if (! ($cyc->coercible_to_frame_p($compound))) {
67 next;
69 my $mw = $cyc -> get_slot_value ($compound, "molecular-weight");
70 $mw =~ s/d0//g;
71 my $smiles = $cyc -> get_slot_value($compound, "SMILES");
72 my @formula = $cyc -> get_slot_values($compound, "chemical-formula");
73 my $formulaString = "";
74 foreach my $f (@formula) {
75 $formulaString.= "$$f[0]$$f[1] ";
78 my @DBlinks = getDbLinks($cyc, $compound);
79 my $DBlink = join "*", @DBlinks;
81 for my $reaction ($cyc->reactions_of_compound($compound)) {
82 my $rname = $cyc -> get_slot_value($reaction, "EC-NUMBER");
83 if (!$rname) { $rname=$reaction; }
84 for my $pathway (pathways_of_reaction($reaction)) {
85 print join("\t", getCommonName($compound),
86 join("*", synonyms_of_compound($compound)),
87 $mw,
88 $formulaString,
89 $smiles,
90 $DBlink,
91 $rname,
92 getCommonName($reaction),
93 getCommonName($pathway));
94 print "\n";