meval: count the complexes containing a motif
[rangi.git] / conv.c
bloba94d28af4cb36ef4ad3d7c23976fee002b827eb6
1 /*
2 * conv, for creating RANGI input files
4 * Copyright (C) 2012 Ali Gholami Rudi <ali at rudi dot ir>
6 * This program is released under the modified BSD license.
7 */
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include "tab.h"
13 #define NLEN 64
14 #define NODES 20000
15 #define EDGES 400000
16 #define THRESH 0.0000001
17 #define NCOLORS 512
19 static char nodes[NODES][NLEN]; /* vertex names */
20 static int nnodes;
21 static int edges_beg[EDGES];
22 static int edges_end[EDGES];
23 static int edges_w[EDGES]; /* edge weights */
24 static int nedges;
25 static int colors[NODES][NCOLORS];
26 static int mapping[NODES]; /* vertex mapping in the output */
27 static int nmapped;
29 static char queries[NCOLORS][NLEN]; /* query names */
30 static int nqueries;
32 static struct tab nodes_tab;
33 static struct tab queries_tab;
35 /* find network protein */
36 static int id(char *s)
38 char *r = tab_get(&nodes_tab, s);
39 return r ? (r - nodes[0]) / NLEN : -1;
42 /* find protein id; insert it if not there */
43 static int id_def(char *s)
45 int idx = id(s);
46 if (idx < 0) {
47 idx = nnodes++;
48 strcpy(nodes[idx], s);
49 tab_add(&nodes_tab, nodes[idx]);
51 return idx;
54 /* find query id */
55 static int qid(char *s)
57 char *r = tab_get(&queries_tab, s);
58 return r ? (r - queries[0]) / NLEN : -1;
61 static void readnetwork(FILE *fin)
63 char s1[NLEN], s2[NLEN];
64 double w;
65 while (fscanf(fin, "%s %lf %s", s1, &w, s2) == 3) {
66 edges_beg[nedges] = id_def(s1);
67 edges_end[nedges] = id_def(s2);
68 edges_w[nedges] = w * 1000000;
69 nedges++;
73 static void queryinteractions(char *path)
75 char s1[NLEN], s2[NLEN];
76 double p;
77 FILE *qin = fopen(path, "r");
78 if (!qin) {
79 fprintf(stderr, "Failed to open query intraction file <%s>\n", path);
80 return;
82 while (fscanf(qin, "%s %s %lf", s1, s2, &p) == 3) {
83 if (p < THRESH) {
84 if (qid(s1) >= 0 && id(s2) >= 0)
85 colors[id(s2)][qid(s1)] = 1;
86 if (qid(s2) >= 0 && id(s1) >= 0)
87 colors[id(s1)][qid(s2)] = 1;
90 fclose(qin);
93 /* handle queries from the same network */
94 static void queryself(void)
96 int i;
97 for (i = 0; i < nqueries; i++)
98 if (id(queries[i]) < 0)
99 fprintf(stderr, "Invalid query protein: %s\n", queries[i]);
100 for (i = 0; i < nedges; i++) {
101 if (qid(nodes[edges_beg[i]]) >= 0)
102 colors[edges_end[i]][qid(nodes[edges_beg[i]])] = 1;
103 if (qid(nodes[edges_end[i]]) >= 0)
104 colors[edges_beg[i]][qid(nodes[edges_end[i]])] = 1;
108 /* assign a new number to non-isolated vertices */
109 static void mapnodes(void)
111 int ncolors[NODES] = {0};
112 int i, j;
113 for (i = 0; i < nnodes; i++)
114 for (j = 0; j < NCOLORS; j++)
115 if (colors[i][j])
116 ncolors[i]++;
117 for (i = 0; i < nnodes; i++)
118 if (ncolors[i] && qid(nodes[i]) < 0)
119 mapping[i] = ++nmapped;
122 static void output(void)
124 int i, j;
125 printf("%d\n", nmapped);
126 for (i = 0; i < nnodes; i++) {
127 if (mapping[i]) {
128 printf("%s ", nodes[i]);
129 for (j = 0; j < NCOLORS; j++)
130 if (colors[i][j])
131 printf("%d ", j);
132 printf("-1\n");
135 for (i = 0; i < nedges; i++)
136 if (mapping[edges_beg[i]] && mapping[edges_end[i]])
137 printf("%s %s %d\n", nodes[edges_beg[i]],
138 nodes[edges_end[i]],
139 edges_w[i]);
142 static void output_dot(void)
144 int i, j;
145 printf("graph G {\n");
146 for (i = 0; i < nnodes; i++) {
147 if (mapping[i]) {
148 unsigned long c = 0;
149 for (j = 0; j < NCOLORS; j++)
150 if (colors[i][j])
151 c |= (1ul << j);
152 printf("%d[colorlist=\"%lu\"];\n",
153 mapping[i], c);
156 for (i = 0; i < nedges; i++)
157 if (mapping[edges_beg[i]] && mapping[edges_end[i]])
158 printf("%d--%d[weight=\"%d\"];\n",
159 mapping[edges_beg[i]],
160 mapping[edges_end[i]],
161 edges_w[i]);
162 printf("}\n");
165 static void printhelp(void)
167 printf("usage: conv -q qfile PROT1 PROT2 ... <network >testdata\n\n");
168 printf("options:\n");
169 printf("\t-q qfile \tthe query blast file\n");
170 printf("\t-g \tcreate a graph in graphviz-format\n");
171 exit(0);
174 int main(int argc, char *argv[])
176 int i;
177 int dot = 0;
178 char *qpath = NULL;
179 for (i = 1; i < argc; i++) {
180 if (argv[i][0] != '-') {
181 int idx = nqueries++;
182 strcpy(queries[idx], argv[i]);
183 if (!tab_get(&queries_tab, queries[idx]))
184 tab_add(&queries_tab, queries[idx]);
186 if (!strcmp("-g", argv[i]))
187 dot = 1;
188 if (!strcmp("-q", argv[i]))
189 qpath = argv[++i];
190 if (!strcmp("-h", argv[i]))
191 printhelp();
193 readnetwork(stdin);
194 if (qpath)
195 queryinteractions(qpath);
196 else
197 queryself();
198 mapnodes();
199 if (dot)
200 output_dot();
201 else
202 output();
203 return 0;