test: dot files are no longer supported
[rangi.git] / rangi.c
blob4f8cf1d8db0ace943e3a817ae1d7a9ef5a6078d7
1 /*
2 * RANGI, a list-colored graph motif finding program
4 * Copyright (C) 2012 Ali Gholami Rudi <ali at rudi dot ir>
6 * This program is released under the modified BSD license.
7 */
8 #include <signal.h>
9 #include <stdlib.h>
10 #include <stdio.h>
11 #include <string.h>
12 #include <unistd.h>
13 #include <pthread.h>
14 #include <sys/time.h>
15 #include "rangi.h"
17 #define MIN(a, b) ((a) < (b) ? (a) : (b))
19 /* N[i] and C[i] lists are terminated with a -1 */
20 static int N[NNODES][NNODES]; /* adjacency list */
21 static int V[NNODES][NNODES]; /* adjacency matrix */
22 static int C[NNODES][NCOLORS]; /* vertex color list */
23 static int D[NNODES]; /* degree of vertices */
24 static int S[NNODES]; /* node indices, sorted by degree */
25 static int F[NCOLORS]; /* color frequency */
27 static int showweights; /* print weights too */
28 static int usekavosh; /* use kavosh instead of fanmod */
29 static int doheur = 1; /* enable heuristics */
30 static int matchingtest;
32 static int readgraph(void);
33 static void readdot(void);
34 static int maxcolors(int *nodes);
36 /* see if the vertices have at least the same number of colors */
37 static int feasible(int *nodes, int rem)
39 int c[NCOLORS] = {0};
40 int nc = 0; /* nodes */
41 int i, j;
42 if (matchingtest) {
43 for (i = 0; nodes[i] >= 0; i++)
45 return maxcolors(nodes) == i;
47 for (i = 0; nodes[i] >= 0; i++) {
48 for (j = 0; C[nodes[i]][j] >= 0; j++) {
49 if (!c[C[nodes[i]][j]]) {
50 c[C[nodes[i]][j]] = 1;
51 nc++;
54 if (i + 1 > nc)
55 return 0;
57 return !nsub_finished(i + rem);
60 /* the maximum size of colors that can be assigned to vertices */
61 static int maxcolors(int *nodes)
63 int *adj[MNODES] = {NULL};
64 int i;
65 for (i = 0; nodes[i] >= 0; i++)
66 adj[i] = C[nodes[i]];
67 return matching(adj, i, NCOLORS);
70 /* does adding a vertex create a larger motif? */
71 static int extend(int *nodes)
73 int map[MNODES] = {0};
74 int n = 0;
75 int i, j, v, matched;
76 while (nodes[n] >= 0)
77 n++;
78 for (i = 0; i < n; i++)
79 map[nodes[i]] = 1;
80 nodes[n + 1] = -1;
81 for (i = 0; i < n; i++) {
82 for (j = 0; N[nodes[i]][j] >= 0; j++) {
83 v = N[nodes[i]][j];
84 if (map[v])
85 continue;
86 map[v] = 1;
87 nodes[n] = v;
88 matched = maxcolors(nodes);
89 nodes[n] = -1;
90 if (matched == n + 1)
91 return 1;
94 return 0;
97 /* found a candidate motif */
98 static int subgraph(int *nodes)
100 int n = 0;
101 while (nodes[n] >= 0)
102 n++;
103 if (maxcolors(nodes) != n)
104 return 0;
105 found(nodes);
106 if (doheur && extend(nodes))
107 nsub_detected(n + 1);
108 return 1;
111 /* the number of colors with with lower frequency than c */
112 static int colorrank(int c)
114 int rank = 0;
115 int i;
116 if (!F[c])
117 return 1000;
118 for (i = 0; i < NCOLORS; i++)
119 if (F[i] && (F[i] < F[c] || (F[i] == F[c] && i < c)))
120 rank++;
121 return rank;
124 /* the total number of colors that occur in the graph */
125 static int ncolors(void)
127 int n = 0;
128 int i;
129 for (i = 0; i < NCOLORS; i++)
130 if (F[i])
131 n++;
132 return n;
135 /* is u a good choice for tree root? */
136 static int goodroot(int u, int crank)
138 int i;
139 /* ignore vertices with no edges */
140 if (N[u][0] < 0)
141 return 0;
142 /* ignore vertices with frequent colors */
143 for (i = 0; C[u][i] >= 0; i++)
144 if (colorrank(C[u][i]) <= crank)
145 return 1;
146 return 0;
149 static int kavosh_enum(int v, int *used, int nsub);
150 static int fanmod_enum(int v, int *used, int nsub);
152 /* generate all subgraphs of size nsub with all possible root vertices */
153 static int gensubs(int nsub)
155 int used[NNODES] = {0};
156 int i;
157 int found = 0;
158 int crank = ncolors() - nsub; /* the maximum color rank of the root */
159 int (*x_enum)(int, int *, int) = usekavosh ? kavosh_enum : fanmod_enum;
160 for (i = 0; i < NNODES; i++) {
161 if (!doheur || goodroot(S[i], crank)) {
162 used[S[i]] = 1;
163 if (!ownroot(nsub, i))
164 found += x_enum(S[i], used, nsub);
167 return found;
170 static int degcmp(const void *i1, const void *i2)
172 return D[*(int *) i1] - D[*(int *) i2];
175 static void init(void)
177 int i, j;
178 /* remove color-less vertices */
179 for (i = 0; i < NNODES; i++)
180 if (C[i][0] < 0)
181 N[i][0] = -1;
182 /* calculate D[i] */
183 for (i = 0; i < NNODES; i++)
184 for (j = 0; N[i][j] >= 0; j++)
185 D[i]++;
186 /* initialize S as the sorted list of vertices based on degrees */
187 for (i = 0; i < NNODES; i++)
188 S[i] = i;
189 if (doheur)
190 qsort(S, NNODES, sizeof(S[0]), degcmp);
193 static void *thread_main(void *v)
195 int nsub = nsub_next(0);
196 do {
197 gensubs(nsub);
198 } while ((nsub = nsub_next(nsub)) > 0);
199 return NULL;
202 static void printhelp(void)
204 printf("list-colored graph motif finding program\n\n");
205 printf("options:\n");
206 printf("\t-s size \tsize of the motif to search for\n");
207 printf("\t-n count \tthe number of threads to create\n");
208 printf("\t-w \tprint the weight of each motif too\n");
209 printf("\t-d secs \tmotif finding deadline\n");
210 printf("\t-1 \tterminate after finding the first motif\n");
211 printf("\t-k \tuse kavosh instead of fanmod for enumeration\n");
212 printf("\t-e \texhaustive search; disable heuristics\n");
213 printf("\t-m \tuse matching to discover infeasible subgraphs\n");
216 static void stopit(int i)
218 fprintf(stderr, "rangi: timeout\n");
219 exit(1);
222 static void printall(void);
224 int main(int argc, char *argv[])
226 char *nsub_spec = NULL;
227 int i;
228 int one = 0;
229 int nthreads = 1;
230 int nsub_beg = 2;
231 int nsub_end = NCOLORS;
232 int nsub_dir = 1;
233 for (i = 1; i < argc; i++) {
234 switch (argv[i][0] == '-' ? argv[i][1] : 'h') {
235 case 's':
236 nsub_spec = argv[i][2] ? argv[i] + 2 : argv[++i];
237 break;
238 case 'n':
239 nthreads = atoi(argv[i][2] ? argv[i] + 2 : argv[++i]);
240 break;
241 case 'd':
242 alarm(atoi(argv[i][2] ? argv[i] + 2 : argv[++i]));
243 break;
244 case '1':
245 one = 1;
246 break;
247 case 'w':
248 showweights = 1;
249 break;
250 case 'k':
251 usekavosh = 1;
252 break;
253 case 'e':
254 doheur = 0;
255 break;
256 case 'm':
257 matchingtest = 1;
258 break;
259 default:
260 printhelp();
261 return 0;
264 signal(SIGALRM, stopit);
265 if (readgraph())
266 readdot();
267 init();
268 nsub_end = ncolors();
269 if (nsub_spec) {
270 int c = nsub_spec[strlen(nsub_spec) - 1];
271 int n = atoi(nsub_spec);
272 if (c == '+')
273 nsub_beg = n;
274 if (c == '-')
275 nsub_end = n;
276 if (c != '-' && c != '+')
277 nsub_beg = nsub_end = n;
278 nsub_dir = c == '-' ? -1 : 1;
280 proc_init(nsub_beg, nsub_end, nsub_dir, one, doheur);
281 /* generate subgraphs */
282 if (nthreads == 1) {
283 thread_main(NULL);
284 } else {
285 pthread_t threads[NTHREADS];
286 for (i = 0; i < nthreads; i++)
287 pthread_create(&threads[i], NULL, thread_main, NULL);
288 for (i = 0; i < nthreads; i++)
289 pthread_join(threads[i], NULL);
291 printall();
292 return 0;
296 /* reading the input graph */
298 static char names[NNODES][NLEN]; /* vertex names */
299 static int nnodes = 1;
301 static int nodeid(char *name)
303 int i;
304 for (i = 0; i < nnodes; i++)
305 if (!strcmp(name, names[i]))
306 return i;
307 strcpy(names[nnodes], name);
308 return nnodes++;
312 * Read a graph in this format:
313 * + the number of vertices
314 * + the name and the list of colors of each vertex, terminated with -1
315 * + edges of graphs and their weights
317 * Example:
318 * 3 # number vertices
319 * v1 2 4 -1 # the name and the colors of the first vertex
320 * v2 1 -1 # the name and the colors of the second vertex
321 * v3 1 3 4 -1 # the name and the colors of the third vertex
322 * v1 v2 102 # first edge with weight 102 between v1 and v2
323 * v1 v3 223 # second edge with weight 223 between v1 and v3
325 static int readgraph(void)
327 int nn[NNODES] = {0};
328 int i, n, c;
329 char u_[NLEN], v_[NLEN];
330 int u, v, w;
331 if (scanf("%d", &n) != 1)
332 return 1;
333 for (i = 1; i <= n; i++) {
334 int nc = 0;
335 scanf("%s", u_);
336 u = nodeid(u_);
337 while (scanf("%d", &c) == 1 && c != -1) {
338 if (c >= NCOLORS) {
339 fprintf(stderr, "rangi: increase NCOLORS!\n");
340 exit(1);
342 C[u][nc++] = c;
343 F[c]++;
345 C[u][nc] = -1;
347 while (scanf("%s %s %d", u_, v_, &w) == 3) {
348 u = nodeid(u_);
349 v = nodeid(v_);
350 N[u][nn[u]++] = v;
351 N[v][nn[v]++] = u;
352 V[u][v] = w;
353 V[v][u] = w;
355 for (i = 0; i < NNODES; i++)
356 N[i][nn[i]] = -1;
357 return 0;
360 /* reads a graph in graphviz format */
361 static void readdot(void)
363 int nn[NNODES] = {0};
364 int u, v, w;
365 long c;
366 int i;
367 scanf("graph G {");
368 while (1) {
369 if (scanf("%d", &u) != 1)
370 break;
371 if (getchar() == '-') {
372 if (scanf("-%d[weight=\"%d\"];", &v, &w) != 2)
373 break;
374 N[u][nn[u]++] = v;
375 N[v][nn[v]++] = u;
376 V[u][v] = w;
377 V[v][u] = w;
378 } else {
379 int nc = 0;
380 if (scanf("colorlist=\"%lu\"];", &c) != 1)
381 break;
382 for (i = 0; i < sizeof(long) * 8; i++)
383 if (c & (1ul << i)) {
384 C[u][nc++] = i;
385 F[i]++;
387 C[u][nc] = -1;
390 for (i = 0; i < NNODES; i++)
391 N[i][nn[i]] = -1;
395 /* weighing and printing motifs */
397 /* the minimum weight of the edges */
398 static int weight_min(int *nodes, int n)
400 int w = 0x0fffffff;
401 int i, j;
402 for (i = 0; i < n; i++) {
403 for (j = i + 1; j < n; j++) {
404 int e = V[nodes[i]][nodes[j]];
405 if (e > 0 && e < w)
406 w = e;
409 return w;
412 /* the sum of the weight of the edges */
413 static int weight_sum(int *nodes, int n)
415 int w = 0;
416 int i, j;
417 for (i = 0; i < n; i++)
418 for (j = i + 1; j < n; j++)
419 w += V[nodes[i]][nodes[j]];
420 return w;
423 /* the number of edges */
424 static int weight_num(int *nodes, int n)
426 int w = 0;
427 int i, j;
428 for (i = 0; i < n; i++)
429 for (j = i + 1; j < n; j++)
430 if (V[nodes[i]][nodes[j]])
431 w++;
432 return w;
435 /* the weight of maximum spanning tree */
436 static int weight_mst(int *nodes, int n)
438 int nei[NCOLORS][NCOLORS] = {{0}}; /* adjacency list */
439 int nnei[NCOLORS] = {0}; /* number of neighbors */
440 int wei[NCOLORS][NCOLORS] = {{0}}; /* weight of edges */
441 int sel[NCOLORS][NCOLORS] = {{0}}; /* selected edges in mst */
442 int mark[NCOLORS]; /* component of vertices */
443 int w = 0;
444 int oldmark, newmark;
445 int i, j, u, v;
446 /* create the induced subgraph of nodes */
447 for (i = 0; i < n; i++) {
448 for (j = 0; j < n; j++) {
449 wei[i][j] = V[nodes[i]][nodes[j]];
450 if (wei[i][j])
451 nei[i][nnei[i]++] = j;
454 /* each node has its own component at the beginning */
455 for (i = 0; i < n; i++)
456 mark[i] = i;
457 /* add n - 1 edges */
458 for (i = 0; i < n - 1; i++) {
459 int maxu = 0, maxv = 1;
460 int maxedge = 0;
461 /* finding the maximum edge */
462 for (u = 0; u < n; u++) {
463 for (j = 0; j < nnei[u]; j++) {
464 v = nei[u][j];
465 if (maxedge < wei[u][v] && !sel[u][v] &&
466 mark[u] != mark[v]) {
467 maxu = u;
468 maxv = v;
472 sel[maxu][maxv] = 1;
473 sel[maxv][maxu] = 1;
474 /* merging the components */
475 oldmark = mark[maxu];
476 newmark = mark[maxv];
477 for (u = 0; u < n; u++)
478 if (mark[u] == oldmark)
479 mark[u] = newmark;
481 /* calculating the weight of the spanning tree */
482 for (u = 0; u < n; u++) {
483 for (j = 0; j < nnei[u]; j++) {
484 v = nei[u][j];
485 if (u < v && sel[u][v])
486 w += wei[u][v];
489 return w;
492 /* print a motif and its weight */
493 static void printmotif(int *nodes)
495 int n = 0;
496 int j;
497 while (nodes[n] >= 0)
498 n++;
499 printf("%d\t", n);
500 for (j = 0; j < n; j++)
501 printf("%s ", names[nodes[j]]);
502 if (showweights) {
503 printf("\t%d %d %d %d",
504 weight_mst(nodes, n),
505 weight_sum(nodes, n),
506 weight_min(nodes, n),
507 weight_num(nodes, n));
509 printf("\n");
512 static void printall(void)
514 int *sols[NSOLS];
515 int n, i;
516 n = motifs(sols, NSOLS);
517 for (i = 0; i < n; i++)
518 printmotif(sols[i]);
522 /* the kavosh enumeration algorithm */
524 /* generate all C(k, n) ways of selecting k integers from {0, ..., n - 1} */
525 static int kavosh_comb(int k, int n, int *idx)
527 int i, j;
528 /* find the first element that could be moved right */
529 for (i = k - 1; i >= 0; i--)
530 if (idx[i] < n - (k - i - 1) - 1)
531 break;
532 if (i < 0)
533 return 1;
534 idx[i]++;
535 for (j = i + 1; j < k; j++)
536 idx[j] = idx[i] + (j - i);
537 return 0;
541 /* extend the given subgraph starting at the given depth */
542 static int kavosh_fill(int *nodes, int *prev, int *used, int rem)
544 int cur[NNODES]; /* vertices in this depth */
545 int idx[NNODES]; /* index of selected vertices in cur[] */
546 int *sel; /* inserted vertices into nodes */
547 int ncur = 0;
548 int ret = 0;
549 int i, j, k;
550 if (doheur && !feasible(nodes, rem))
551 return 0;
552 if (!rem)
553 return subgraph(nodes);
555 * put the vertices with the given depth into cur[] and mark
556 * them in used[]
558 for (i = 0; prev[i] >= 0; i++) {
559 int u = prev[i];
560 for (j = 0; N[u][j] >= 0; j++) {
561 int v = N[u][j];
562 if (!used[v] && N[v][0] >= 0) {
563 cur[ncur++] = v;
564 used[v] = 1;
568 cur[ncur] = -1;
569 /* where to insert new nodes */
570 sel = nodes;
571 while (*sel >= 0)
572 sel++;
573 /* try all subgraphs with k vertices at this depth */
574 for (k = 1; k <= MIN(ncur, rem); k++) {
575 for (i = 0; i < k; i++)
576 idx[i] = i;
577 sel[k] = -1;
578 /* enumerate all sel[] of size k from cur[] of size ncur */
579 do {
580 for (i = 0; i < k; i++)
581 sel[i] = cur[idx[i]];
582 ret += kavosh_fill(nodes, sel, used, rem - k);
583 } while (!kavosh_comb(k, ncur, idx));
585 for (i = 0; cur[i] >= 0; i++)
586 used[cur[i]] = 0;
587 *sel = -1;
588 return ret;
591 /* an optimized version of the Kavosh algorithm */
592 static int kavosh_enum(int v, int *used, int nsub)
594 int nodes[NNODES] = {v, -1};
595 nodes[0] = v;
596 return kavosh_fill(nodes, nodes, used, nsub - 1);
600 /* the fanmod enumeration algorithm */
602 /* insert the neighbors of w into the extension list */
603 static int fanmod_addext(int *sub_map, int *ext_map, int *ext, int *used, int w)
605 int i;
606 int n = 0;
607 for (i = 0; N[w][i] >= 0; i++) {
608 int u = N[w][i];
609 if (used[u] || sub_map[u] || ext_map[u])
610 continue;
611 ext_map[u] = 1;
612 ext[n++] = u;
614 ext[n] = -1;
615 return n;
618 /* remove the vertices added via fanmod_addext */
619 static void fanmod_delext(int *ext_map, int *ext)
621 int i;
622 for (i = 0; ext[i] >= 0; i++)
623 ext_map[ext[i]] = 0;
624 ext[0] = -1;
627 /* recursively add vertices to sub; see the comments in fanmod_enum() */
628 static int fanmod_fill(int *used, int *sub, int *sub_map,
629 int *ext, int *ext_map, int rem)
631 int n_sub = 0; /* number of entries in sub */
632 int n_ext = 0; /* number of entries in ext */
633 int found = 0;
634 int i;
635 if (doheur && !feasible(sub, rem))
636 return 0;
637 if (!rem)
638 return subgraph(sub);
639 while (sub[n_sub] >= 0)
640 n_sub++;
641 while (ext[n_ext] >= 0)
642 n_ext++;
643 for (i = 0; i < n_ext; i++) {
644 sub[n_sub] = ext[i];
645 sub_map[ext[i]] = 1;
646 sub[n_sub + 1] = -1;
647 fanmod_addext(sub_map, ext_map, ext + n_ext, used, ext[i]);
648 found += fanmod_fill(used, sub, sub_map, ext + i + 1, ext_map, rem - 1);
649 fanmod_delext(ext_map, ext + n_ext);
650 sub[n_sub] = -1;
651 sub_map[ext[i]] = 0;
653 return found;
656 /* an optimized version of the ESU algorithm */
657 static int fanmod_enum(int v, int *used, int nsub)
659 int sub[NNODES] = {v, -1}; /* the -1 terminated subgraph vertices */
660 int sub_map[NNODES] = {0}; /* sub_map[i] is one if i is in sub */
661 int ext[NNODES] = {-1}; /* the -1 terminated extension list */
662 int ext_map[NNODES] = {0}; /* ext_map[i] is one if i is in ext */
663 sub_map[v] = 1;
664 fanmod_addext(sub_map, ext_map, ext, used, v);
665 return fanmod_fill(used, sub, sub_map, ext, ext_map, nsub - 1);