README: mentioning other make targets seem unnecessary
[rangi.git] / rangi.c
blob7fc82358bc12b49ac9ecc2ea3d0fb52b77654f6d
1 /*
2 * RANGI, a list-colored graph motif finding program
4 * Copyright (C) 2012 Ali Gholami Rudi <ali at rudi dot ir>
6 * This program is released under the modified BSD license.
7 */
8 #include <signal.h>
9 #include <stdlib.h>
10 #include <stdio.h>
11 #include <string.h>
12 #include <unistd.h>
13 #include <pthread.h>
14 #include <sys/time.h>
15 #include "rangi.h"
17 #define MIN(a, b) ((a) < (b) ? (a) : (b))
19 /* N[i] and C[i] lists are terminated with a -1 */
20 static int N[NNODES][NNODES]; /* adjacency list */
21 static int V[NNODES][NNODES]; /* adjacency matrix */
22 static int C[NNODES][NCOLORS]; /* vertex color list */
23 static int D[NNODES]; /* degree of vertices */
24 static int S[NNODES]; /* node indices, sorted by degree */
25 static int F[NCOLORS]; /* color frequency */
27 static int showweights; /* print weights too */
28 static int usekavosh; /* use kavosh instead of fanmod */
29 static int doheur = 1; /* enable heuristics */
30 static int matchthresh = 6; /* the branching threshold for matching test */
31 static int verbose; /* print the motifs as they are reported */
33 static int readgraph(void);
34 static void readdot(void);
35 static int maxcolors(int *nodes);
36 static void printmotif(int *nodes);
38 /* perform a matching to see if the subgraph is feasible */
39 static int feasible_match(int *nodes)
41 int i = 0;
42 while (nodes[i] >= 0)
43 i++;
44 return maxcolors(nodes) == i;
47 /* see if the vertices have at least the same number of colors */
48 static int feasible(int *nodes, int rem)
50 int c[NCOLORS] = {0};
51 int nc = 0; /* nodes */
52 int i, j;
53 for (i = 0; nodes[i] >= 0; i++) {
54 for (j = 0; C[nodes[i]][j] >= 0; j++) {
55 if (!c[C[nodes[i]][j]]) {
56 c[C[nodes[i]][j]] = 1;
57 nc++;
60 if (i + 1 > nc)
61 return 0;
63 return !nsub_finished(i + rem);
66 /* find the maximum number of colors that can be mapped to nodes */
67 static int maxcolors(int *nodes)
69 int *adj[MNODES] = {NULL};
70 int i;
71 for (i = 0; nodes[i] >= 0; i++)
72 adj[i] = C[nodes[i]];
73 return matching(adj, i, NCOLORS);
76 /* does adding a vertex create a larger motif? */
77 static int extend(int *nodes)
79 int map[MNODES] = {0};
80 int n = 0;
81 int i, j, v, matched;
82 while (nodes[n] >= 0)
83 n++;
84 for (i = 0; i < n; i++)
85 map[nodes[i]] = 1;
86 nodes[n + 1] = -1;
87 for (i = 0; i < n; i++) {
88 for (j = 0; N[nodes[i]][j] >= 0; j++) {
89 v = N[nodes[i]][j];
90 if (map[v])
91 continue;
92 map[v] = 1;
93 nodes[n] = v;
94 matched = maxcolors(nodes);
95 nodes[n] = -1;
96 if (matched == n + 1)
97 return 1;
100 return 0;
103 /* found a candidate motif */
104 static int subgraph(int *nodes)
106 int n = 0;
107 while (nodes[n] >= 0)
108 n++;
109 if (maxcolors(nodes) != n)
110 return 0;
111 found(nodes);
112 if (verbose)
113 printmotif(nodes);
114 if (doheur && extend(nodes))
115 nsub_detected(n + 1);
116 return 1;
119 /* the number of colors with with lower frequency than c */
120 static int colorrank(int c)
122 int rank = 0;
123 int i;
124 if (!F[c])
125 return 1000;
126 for (i = 0; i < NCOLORS; i++)
127 if (F[i] && (F[i] < F[c] || (F[i] == F[c] && i < c)))
128 rank++;
129 return rank;
132 /* the total number of colors that occur in the graph */
133 static int ncolors(void)
135 int n = 0;
136 int i;
137 for (i = 0; i < NCOLORS; i++)
138 if (F[i])
139 n++;
140 return n;
143 /* is u a good choice for tree root? */
144 static int goodroot(int u, int crank)
146 int i;
147 /* ignore vertices with no edges */
148 if (N[u][0] < 0)
149 return 0;
150 /* ignore vertices with frequent colors */
151 for (i = 0; C[u][i] >= 0; i++)
152 if (colorrank(C[u][i]) <= crank)
153 return 1;
154 return 0;
157 static int kavosh_enum(int v, int *used, int nsub);
158 static int fanmod_enum(int v, int *used, int nsub);
159 static int reachable(int v, int *used, int nsub);
161 /* generate all subgraphs of size nsub with all possible root vertices */
162 static int gensubs(int nsub)
164 int used[NNODES] = {0};
165 int i;
166 int found = 0;
167 int crank = ncolors() - nsub; /* the maximum color rank of the root */
168 int (*x_enum)(int, int *, int) = usekavosh ? kavosh_enum : fanmod_enum;
169 for (i = 0; i < NNODES; i++) {
170 if (!doheur || goodroot(S[i], crank)) {
171 used[S[i]] = 1;
172 if (!ownroot(nsub, i))
173 if (!doheur || reachable(S[i], used, nsub))
174 found += x_enum(S[i], used, nsub);
177 return found;
180 static int degcmp(const void *i1, const void *i2)
182 return D[*(int *) i1] - D[*(int *) i2];
185 static void init(void)
187 int i, j;
188 /* remove color-less vertices */
189 for (i = 0; i < NNODES; i++)
190 if (C[i][0] < 0)
191 N[i][0] = -1;
192 /* calculate D[i] */
193 for (i = 0; i < NNODES; i++)
194 for (j = 0; N[i][j] >= 0; j++)
195 D[i]++;
196 /* initialize S as the sorted list of vertices based on degrees */
197 for (i = 0; i < NNODES; i++)
198 S[i] = i;
199 if (doheur)
200 qsort(S, NNODES, sizeof(S[0]), degcmp);
203 static void *thread_main(void *v)
205 int nsub = nsub_next(0);
206 do {
207 gensubs(nsub);
208 } while ((nsub = nsub_next(nsub)) > 0);
209 return NULL;
212 static void printhelp(void)
214 printf("RANGI: a list-colored graph motif finding program\n\n");
215 printf("options:\n");
216 printf("\t-s size \tsize of the motif to search for\n");
217 printf("\t-n count \tthe number of threads to create\n");
218 printf("\t-w \tprint the weight of each motif too\n");
219 printf("\t-d secs \tmotif finding deadline\n");
220 printf("\t-1 \tterminate after finding the first motif\n");
221 printf("\t-k \tuse kavosh instead of fanmod for enumeration\n");
222 printf("\t-e \texhaustive search; disable heuristics\n");
223 printf("\t-m thresh\tminimum branches to enable matching test (%d)\n",
224 matchthresh);
225 printf("\t-v \tprint the motifs as they are detected\n");
228 static void stopit(int i)
230 fprintf(stderr, "rangi: timeout\n");
231 exit(1);
234 static void printall(void);
236 int main(int argc, char *argv[])
238 char *nsub_spec = NULL;
239 int i;
240 int one = 0;
241 int nthreads = 1;
242 int nsub_beg = 2;
243 int nsub_end = NCOLORS;
244 int nsub_dir = 1;
245 for (i = 1; i < argc; i++) {
246 switch (argv[i][0] == '-' ? argv[i][1] : 'h') {
247 case 's':
248 nsub_spec = argv[i][2] ? argv[i] + 2 : argv[++i];
249 break;
250 case 'n':
251 nthreads = atoi(argv[i][2] ? argv[i] + 2 : argv[++i]);
252 break;
253 case 'd':
254 alarm(atoi(argv[i][2] ? argv[i] + 2 : argv[++i]));
255 break;
256 case '1':
257 one = 1;
258 break;
259 case 'w':
260 showweights = 1;
261 break;
262 case 'k':
263 usekavosh = 1;
264 break;
265 case 'e':
266 doheur = 0;
267 break;
268 case 'm':
269 matchthresh = atoi(argv[i][2] ? argv[i] + 2 : argv[++i]);
270 break;
271 case 'v':
272 verbose = 1;
273 break;
274 default:
275 printhelp();
276 return 0;
279 signal(SIGALRM, stopit);
280 if (readgraph())
281 readdot();
282 init();
283 nsub_end = ncolors();
284 if (nsub_spec) {
285 int c = nsub_spec[strlen(nsub_spec) - 1];
286 int n = atoi(nsub_spec);
287 if (c == '+')
288 nsub_beg = n;
289 if (c == '-')
290 nsub_end = n;
291 if (c != '-' && c != '+')
292 nsub_beg = nsub_end = n;
293 nsub_dir = c == '-' ? -1 : 1;
295 proc_init(nsub_beg, nsub_end, nsub_dir, one, doheur);
296 /* generate subgraphs */
297 if (nthreads == 1) {
298 thread_main(NULL);
299 } else {
300 pthread_t threads[NTHREADS];
301 for (i = 0; i < nthreads; i++)
302 pthread_create(&threads[i], NULL, thread_main, NULL);
303 for (i = 0; i < nthreads; i++)
304 pthread_join(threads[i], NULL);
306 if (!verbose)
307 printall();
308 return 0;
312 /* reading the input graph */
314 static char names[NNODES][NLEN]; /* vertex names */
315 static int nnodes = 1;
317 static int nodeid(char *name)
319 int i;
320 for (i = 0; i < nnodes; i++)
321 if (!strcmp(name, names[i]))
322 return i;
323 strcpy(names[nnodes], name);
324 return nnodes++;
328 * Read a graph in this format:
329 * + the number of vertices
330 * + the name and the list of colors of each vertex, terminated with -1
331 * + edges of graphs and their weights
333 * Example:
334 * 3 # number vertices
335 * v1 2 4 -1 # the name and the colors of the first vertex
336 * v2 1 -1 # the name and the colors of the second vertex
337 * v3 1 3 4 -1 # the name and the colors of the third vertex
338 * v1 v2 102 # first edge with weight 102 between v1 and v2
339 * v1 v3 223 # second edge with weight 223 between v1 and v3
341 static int readgraph(void)
343 int nn[NNODES] = {0};
344 int i, n, c;
345 char u_[NLEN], v_[NLEN];
346 int u, v, w;
347 if (scanf("%d", &n) != 1)
348 return 1;
349 for (i = 1; i <= n; i++) {
350 int nc = 0;
351 scanf("%s", u_);
352 u = nodeid(u_);
353 while (scanf("%d", &c) == 1 && c != -1) {
354 if (c >= NCOLORS) {
355 fprintf(stderr, "rangi: increase NCOLORS!\n");
356 exit(1);
358 C[u][nc++] = c;
359 F[c]++;
361 C[u][nc] = -1;
363 while (scanf("%s %s %d", u_, v_, &w) == 3) {
364 u = nodeid(u_);
365 v = nodeid(v_);
366 N[u][nn[u]++] = v;
367 N[v][nn[v]++] = u;
368 V[u][v] = w;
369 V[v][u] = w;
371 for (i = 0; i < NNODES; i++)
372 N[i][nn[i]] = -1;
373 return 0;
376 /* reads a graph in graphviz format */
377 static void readdot(void)
379 int nn[NNODES] = {0};
380 int u, v, w;
381 long c;
382 int i;
383 scanf("graph G {");
384 while (1) {
385 if (scanf("%d", &u) != 1)
386 break;
387 if (getchar() == '-') {
388 if (scanf("-%d[weight=\"%d\"];", &v, &w) != 2)
389 break;
390 N[u][nn[u]++] = v;
391 N[v][nn[v]++] = u;
392 V[u][v] = w;
393 V[v][u] = w;
394 } else {
395 int nc = 0;
396 if (scanf("colorlist=\"%lu\"];", &c) != 1)
397 break;
398 for (i = 0; i < sizeof(long) * 8; i++)
399 if (c & (1ul << i)) {
400 C[u][nc++] = i;
401 F[i]++;
403 C[u][nc] = -1;
406 for (i = 0; i < NNODES; i++)
407 N[i][nn[i]] = -1;
411 /* weighing and printing motifs */
413 /* the minimum weight of the edges */
414 static int weight_min(int *nodes, int n)
416 int w = 0x0fffffff;
417 int i, j;
418 for (i = 0; i < n; i++) {
419 for (j = i + 1; j < n; j++) {
420 int e = V[nodes[i]][nodes[j]];
421 if (e > 0 && e < w)
422 w = e;
425 return w;
428 /* the sum of the weight of the edges */
429 static int weight_sum(int *nodes, int n)
431 int w = 0;
432 int i, j;
433 for (i = 0; i < n; i++)
434 for (j = i + 1; j < n; j++)
435 w += V[nodes[i]][nodes[j]];
436 return w;
439 /* the number of edges */
440 static int weight_num(int *nodes, int n)
442 int w = 0;
443 int i, j;
444 for (i = 0; i < n; i++)
445 for (j = i + 1; j < n; j++)
446 if (V[nodes[i]][nodes[j]])
447 w++;
448 return w;
451 /* the weight of maximum spanning tree */
452 static int weight_mst(int *nodes, int n)
454 int nei[NCOLORS][NCOLORS] = {{0}}; /* adjacency list */
455 int nnei[NCOLORS] = {0}; /* number of neighbors */
456 int wei[NCOLORS][NCOLORS] = {{0}}; /* weight of edges */
457 int sel[NCOLORS][NCOLORS] = {{0}}; /* selected edges in mst */
458 int mark[NCOLORS]; /* component of vertices */
459 int w = 0;
460 int oldmark, newmark;
461 int i, j, u, v;
462 /* create the induced subgraph of nodes */
463 for (i = 0; i < n; i++) {
464 for (j = 0; j < n; j++) {
465 wei[i][j] = V[nodes[i]][nodes[j]];
466 if (wei[i][j])
467 nei[i][nnei[i]++] = j;
470 /* each node has its own component at the beginning */
471 for (i = 0; i < n; i++)
472 mark[i] = i;
473 /* add n - 1 edges */
474 for (i = 0; i < n - 1; i++) {
475 int maxu = 0, maxv = 1;
476 int maxedge = 0;
477 /* finding the maximum edge */
478 for (u = 0; u < n; u++) {
479 for (j = 0; j < nnei[u]; j++) {
480 v = nei[u][j];
481 if (maxedge < wei[u][v] && !sel[u][v] &&
482 mark[u] != mark[v]) {
483 maxu = u;
484 maxv = v;
488 sel[maxu][maxv] = 1;
489 sel[maxv][maxu] = 1;
490 /* merging the components */
491 oldmark = mark[maxu];
492 newmark = mark[maxv];
493 for (u = 0; u < n; u++)
494 if (mark[u] == oldmark)
495 mark[u] = newmark;
497 /* calculating the weight of the spanning tree */
498 for (u = 0; u < n; u++) {
499 for (j = 0; j < nnei[u]; j++) {
500 v = nei[u][j];
501 if (u < v && sel[u][v])
502 w += wei[u][v];
505 return w;
508 /* print a motif and its weight */
509 static void printmotif(int *nodes)
511 int n = 0;
512 int j;
513 while (nodes[n] >= 0)
514 n++;
515 printf("%d\t", n);
516 for (j = 0; j < n; j++)
517 printf("%s ", names[nodes[j]]);
518 if (showweights) {
519 printf("\t%d %d %d %d",
520 weight_mst(nodes, n),
521 weight_sum(nodes, n),
522 weight_min(nodes, n),
523 weight_num(nodes, n));
525 printf("\n");
528 static void printall(void)
530 int *sols[NSOLS];
531 int n, i;
532 n = motifs(sols, NSOLS);
533 for (i = 0; i < n; i++)
534 printmotif(sols[i]);
538 /* the kavosh enumeration algorithm */
540 /* generate all C(k, n) ways of selecting k integers from {0, ..., n - 1} */
541 static int kavosh_comb(int k, int n, int *idx)
543 int i, j;
544 /* find the first element that could be moved right */
545 for (i = k - 1; i >= 0; i--)
546 if (idx[i] < n - (k - i - 1) - 1)
547 break;
548 if (i < 0)
549 return 1;
550 idx[i]++;
551 for (j = i + 1; j < k; j++)
552 idx[j] = idx[i] + (j - i);
553 return 0;
556 /* extend the given subgraph starting at the given depth */
557 static int kavosh_fill(int *nodes, int *prev, int *used, int rem)
559 int cur[NNODES]; /* vertices in this depth */
560 int idx[NNODES]; /* index of selected vertices in cur[] */
561 int *sel; /* inserted vertices into nodes */
562 int ncur = 0;
563 int ret = 0;
564 int i, j, k;
565 if (doheur && !feasible(nodes, rem))
566 return 0;
567 if (!rem)
568 return subgraph(nodes);
570 * put the vertices with the given depth into cur[] and mark
571 * them in used[]
573 for (i = 0; prev[i] >= 0; i++) {
574 int u = prev[i];
575 for (j = 0; N[u][j] >= 0; j++) {
576 int v = N[u][j];
577 if (!used[v] && N[v][0] >= 0) {
578 cur[ncur++] = v;
579 used[v] = 1;
583 cur[ncur] = -1;
584 /* where to insert new nodes */
585 sel = nodes;
586 while (*sel >= 0)
587 sel++;
588 if (doheur && ncur * (ncur - 1) / 2 >= matchthresh && !feasible_match(nodes))
589 goto out;
590 /* try all subgraphs with k vertices at this depth */
591 for (k = 1; k <= MIN(ncur, rem); k++) {
592 for (i = 0; i < k; i++)
593 idx[i] = i;
594 sel[k] = -1;
595 /* enumerate all sel[] of size k from cur[] of size ncur */
596 do {
597 for (i = 0; i < k; i++)
598 sel[i] = cur[idx[i]];
599 ret += kavosh_fill(nodes, sel, used, rem - k);
600 } while (!kavosh_comb(k, ncur, idx));
602 out:
603 for (i = 0; cur[i] >= 0; i++)
604 used[cur[i]] = 0;
605 *sel = -1;
606 return ret;
609 /* an optimized version of the Kavosh algorithm */
610 static int kavosh_enum(int v, int *used, int nsub)
612 int nodes[NNODES] = {v, -1};
613 nodes[0] = v;
614 return kavosh_fill(nodes, nodes, used, nsub - 1);
618 /* the fanmod enumeration algorithm */
620 /* insert the neighbors of w into the extension list */
621 static int fanmod_addext(int *sub_map, int *ext_map, int *ext, int *used, int w)
623 int i;
624 int n = 0;
625 for (i = 0; N[w][i] >= 0; i++) {
626 int u = N[w][i];
627 if (used[u] || sub_map[u] || ext_map[u])
628 continue;
629 ext_map[u] = 1;
630 ext[n++] = u;
632 ext[n] = -1;
633 return n;
636 /* remove the vertices added via fanmod_addext */
637 static void fanmod_delext(int *ext_map, int *ext)
639 int i;
640 for (i = 0; ext[i] >= 0; i++)
641 ext_map[ext[i]] = 0;
642 ext[0] = -1;
645 /* recursively add vertices to sub; see the comments in fanmod_enum() */
646 static int fanmod_fill(int *used, int *sub, int *sub_map,
647 int *ext, int *ext_map, int rem)
649 int n_sub = 0; /* number of entries in sub */
650 int n_ext = 0; /* number of entries in ext */
651 int found = 0;
652 int i;
653 if (doheur && !feasible(sub, rem))
654 return 0;
655 if (!rem)
656 return subgraph(sub);
657 while (sub[n_sub] >= 0)
658 n_sub++;
659 while (ext[n_ext] >= 0)
660 n_ext++;
661 if (doheur && n_ext >= matchthresh && !feasible_match(sub))
662 return 0;
663 for (i = 0; i < n_ext; i++) {
664 sub[n_sub] = ext[i];
665 sub_map[ext[i]] = 1;
666 sub[n_sub + 1] = -1;
667 fanmod_addext(sub_map, ext_map, ext + n_ext, used, ext[i]);
668 found += fanmod_fill(used, sub, sub_map, ext + i + 1, ext_map, rem - 1);
669 fanmod_delext(ext_map, ext + n_ext);
670 sub[n_sub] = -1;
671 sub_map[ext[i]] = 0;
673 return found;
676 /* an optimized version of the ESU algorithm */
677 static int fanmod_enum(int v, int *used, int nsub)
679 int sub[NNODES] = {v, -1}; /* the -1 terminated subgraph vertices */
680 int sub_map[NNODES] = {0}; /* sub_map[i] is one if i is in sub */
681 int ext[NNODES] = {-1}; /* the -1 terminated extension list */
682 int ext_map[NNODES] = {0}; /* ext_map[i] is one if i is in ext */
683 sub_map[v] = 1;
684 fanmod_addext(sub_map, ext_map, ext, used, v);
685 return fanmod_fill(used, sub, sub_map, ext, ext_map, nsub - 1);
689 /* count the number of reachable colors from a vertex */
690 static int reachable(int v, int *used, int nsub)
692 int mark[NNODES] = {0};
693 int clrs[NCOLORS] = {0};
694 int q[NNODES];
695 int qhead = 0;
696 int qtail = 1;
697 int nclrs = 0;
698 int i;
699 q[0] = v;
700 mark[v] = 1;
701 /* the BFS algorithm */
702 while (qhead < qtail) {
703 if (nclrs >= nsub && qtail >= nsub)
704 break;
705 v = q[qhead++];
706 /* counting the colors in v */
707 for (i = 0; C[v][i] >= 0; i++) {
708 if (!clrs[C[v][i]]) {
709 clrs[C[v][i]] = 1;
710 nclrs++;
713 if (mark[v] >= nsub)
714 continue;
715 /* adding neighbors of v to queue */
716 for (i = 0; N[v][i] >= 0; i++) {
717 int u = N[v][i];
718 if (!mark[u] && !used[u]) {
719 q[qtail++] = u;
720 mark[u] = mark[v] + 1;
724 /* the neighborhood has nsub colors and the component nsub vertices */
725 return nclrs >= nsub && qtail >= nsub;