2 * RANGI, a list-colored graph motif finding program
4 * Copyright (C) 2012 Ali Gholami Rudi <ali at rudi dot ir>
6 * This program is released under the modified BSD license.
17 #define MIN(a, b) ((a) < (b) ? (a) : (b))
19 /* N[i] and C[i] lists are terminated with a -1 */
20 static int N
[NNODES
][NNODES
]; /* adjacency list */
21 static int V
[NNODES
][NNODES
]; /* adjacency matrix */
22 static int C
[NNODES
][NCOLORS
]; /* vertex color list */
23 static int D
[NNODES
]; /* degree of vertices */
24 static int S
[NNODES
]; /* node indices, sorted by degree */
25 static int F
[NCOLORS
]; /* color frequency */
27 static int showweights
; /* print weights too */
28 static int usekavosh
; /* use kavosh instead of fanmod */
29 static int doheur
= 1; /* enable heuristics */
30 static int matchthresh
= 6; /* the branching threshold for matching test */
31 static int verbose
; /* print the motifs as they are reported */
33 static int readgraph(void);
34 static void readdot(void);
35 static int maxcolors(int *nodes
);
36 static void printmotif(int *nodes
);
38 /* perform a matching to see if the subgraph is feasible */
39 static int feasible_match(int *nodes
)
44 return maxcolors(nodes
) == i
;
47 /* see if the vertices have at least the same number of colors */
48 static int feasible(int *nodes
, int rem
)
51 int nc
= 0; /* nodes */
53 for (i
= 0; nodes
[i
] >= 0; i
++) {
54 for (j
= 0; C
[nodes
[i
]][j
] >= 0; j
++) {
55 if (!c
[C
[nodes
[i
]][j
]]) {
56 c
[C
[nodes
[i
]][j
]] = 1;
63 return !nsub_finished(i
+ rem
);
66 /* find the maximum number of colors that can be mapped to nodes */
67 static int maxcolors(int *nodes
)
69 int *adj
[MNODES
] = {NULL
};
71 for (i
= 0; nodes
[i
] >= 0; i
++)
73 return matching(adj
, i
, NCOLORS
);
76 /* does adding a vertex create a larger motif? */
77 static int extend(int *nodes
)
79 int map
[MNODES
] = {0};
84 for (i
= 0; i
< n
; i
++)
87 for (i
= 0; i
< n
; i
++) {
88 for (j
= 0; N
[nodes
[i
]][j
] >= 0; j
++) {
94 matched
= maxcolors(nodes
);
103 /* found a candidate motif */
104 static int subgraph(int *nodes
)
107 while (nodes
[n
] >= 0)
109 if (maxcolors(nodes
) != n
)
114 if (doheur
&& extend(nodes
))
115 nsub_detected(n
+ 1);
119 /* the number of colors with with lower frequency than c */
120 static int colorrank(int c
)
126 for (i
= 0; i
< NCOLORS
; i
++)
127 if (F
[i
] && (F
[i
] < F
[c
] || (F
[i
] == F
[c
] && i
< c
)))
132 /* the total number of colors that occur in the graph */
133 static int ncolors(void)
137 for (i
= 0; i
< NCOLORS
; i
++)
143 /* is u a good choice for tree root? */
144 static int goodroot(int u
, int crank
)
147 /* ignore vertices with no edges */
150 /* ignore vertices with frequent colors */
151 for (i
= 0; C
[u
][i
] >= 0; i
++)
152 if (colorrank(C
[u
][i
]) <= crank
)
157 static int kavosh_enum(int v
, int *used
, int nsub
);
158 static int fanmod_enum(int v
, int *used
, int nsub
);
159 static int reachable(int v
, int *used
, int nsub
);
161 /* generate all subgraphs of size nsub with all possible root vertices */
162 static int gensubs(int nsub
)
164 int used
[NNODES
] = {0};
167 int crank
= ncolors() - nsub
; /* the maximum color rank of the root */
168 int (*x_enum
)(int, int *, int) = usekavosh
? kavosh_enum
: fanmod_enum
;
169 for (i
= 0; i
< NNODES
; i
++) {
170 if (!doheur
|| goodroot(S
[i
], crank
)) {
172 if (!ownroot(nsub
, i
))
173 if (!doheur
|| reachable(S
[i
], used
, nsub
))
174 found
+= x_enum(S
[i
], used
, nsub
);
180 static int degcmp(const void *i1
, const void *i2
)
182 return D
[*(int *) i1
] - D
[*(int *) i2
];
185 static void init(void)
188 /* remove color-less vertices */
189 for (i
= 0; i
< NNODES
; i
++)
193 for (i
= 0; i
< NNODES
; i
++)
194 for (j
= 0; N
[i
][j
] >= 0; j
++)
196 /* initialize S as the sorted list of vertices based on degrees */
197 for (i
= 0; i
< NNODES
; i
++)
200 qsort(S
, NNODES
, sizeof(S
[0]), degcmp
);
203 static void *thread_main(void *v
)
205 int nsub
= nsub_next(0);
208 } while ((nsub
= nsub_next(nsub
)) > 0);
212 static void printhelp(void)
214 printf("RANGI: a list-colored graph motif finding program\n\n");
215 printf("options:\n");
216 printf("\t-s size \tsize of the motif to search for\n");
217 printf("\t-n count \tthe number of threads to create\n");
218 printf("\t-w \tprint the weight of each motif too\n");
219 printf("\t-d secs \tmotif finding deadline\n");
220 printf("\t-1 \tterminate after finding the first motif\n");
221 printf("\t-k \tuse kavosh instead of fanmod for enumeration\n");
222 printf("\t-e \texhaustive search; disable heuristics\n");
223 printf("\t-m thresh\tminimum branches to enable matching test (%d)\n",
225 printf("\t-v \tprint the motifs as they are detected\n");
228 static void stopit(int i
)
230 fprintf(stderr
, "rangi: timeout\n");
234 static void printall(void);
236 int main(int argc
, char *argv
[])
238 char *nsub_spec
= NULL
;
243 int nsub_end
= NCOLORS
;
245 for (i
= 1; i
< argc
; i
++) {
246 switch (argv
[i
][0] == '-' ? argv
[i
][1] : 'h') {
248 nsub_spec
= argv
[i
][2] ? argv
[i
] + 2 : argv
[++i
];
251 nthreads
= atoi(argv
[i
][2] ? argv
[i
] + 2 : argv
[++i
]);
254 alarm(atoi(argv
[i
][2] ? argv
[i
] + 2 : argv
[++i
]));
269 matchthresh
= atoi(argv
[i
][2] ? argv
[i
] + 2 : argv
[++i
]);
279 signal(SIGALRM
, stopit
);
283 nsub_end
= ncolors();
285 int c
= nsub_spec
[strlen(nsub_spec
) - 1];
286 int n
= atoi(nsub_spec
);
291 if (c
!= '-' && c
!= '+')
292 nsub_beg
= nsub_end
= n
;
293 nsub_dir
= c
== '-' ? -1 : 1;
295 proc_init(nsub_beg
, nsub_end
, nsub_dir
, one
, doheur
);
296 /* generate subgraphs */
300 pthread_t threads
[NTHREADS
];
301 for (i
= 0; i
< nthreads
; i
++)
302 pthread_create(&threads
[i
], NULL
, thread_main
, NULL
);
303 for (i
= 0; i
< nthreads
; i
++)
304 pthread_join(threads
[i
], NULL
);
312 /* reading the input graph */
314 static char names
[NNODES
][NLEN
]; /* vertex names */
315 static int nnodes
= 1;
317 static int nodeid(char *name
)
320 for (i
= 0; i
< nnodes
; i
++)
321 if (!strcmp(name
, names
[i
]))
323 strcpy(names
[nnodes
], name
);
328 * Read a graph in this format:
329 * + the number of vertices
330 * + the name and the list of colors of each vertex, terminated with -1
331 * + edges of graphs and their weights
334 * 3 # number vertices
335 * v1 2 4 -1 # the name and the colors of the first vertex
336 * v2 1 -1 # the name and the colors of the second vertex
337 * v3 1 3 4 -1 # the name and the colors of the third vertex
338 * v1 v2 102 # first edge with weight 102 between v1 and v2
339 * v1 v3 223 # second edge with weight 223 between v1 and v3
341 static int readgraph(void)
343 int nn
[NNODES
] = {0};
345 char u_
[NLEN
], v_
[NLEN
];
347 if (scanf("%d", &n
) != 1)
349 for (i
= 1; i
<= n
; i
++) {
353 while (scanf("%d", &c
) == 1 && c
!= -1) {
355 fprintf(stderr
, "rangi: increase NCOLORS!\n");
363 while (scanf("%s %s %d", u_
, v_
, &w
) == 3) {
371 for (i
= 0; i
< NNODES
; i
++)
376 /* reads a graph in graphviz format */
377 static void readdot(void)
379 int nn
[NNODES
] = {0};
385 if (scanf("%d", &u
) != 1)
387 if (getchar() == '-') {
388 if (scanf("-%d[weight=\"%d\"];", &v
, &w
) != 2)
396 if (scanf("colorlist=\"%lu\"];", &c
) != 1)
398 for (i
= 0; i
< sizeof(long) * 8; i
++)
399 if (c
& (1ul << i
)) {
406 for (i
= 0; i
< NNODES
; i
++)
411 /* weighing and printing motifs */
413 /* the minimum weight of the edges */
414 static int weight_min(int *nodes
, int n
)
418 for (i
= 0; i
< n
; i
++) {
419 for (j
= i
+ 1; j
< n
; j
++) {
420 int e
= V
[nodes
[i
]][nodes
[j
]];
428 /* the sum of the weight of the edges */
429 static int weight_sum(int *nodes
, int n
)
433 for (i
= 0; i
< n
; i
++)
434 for (j
= i
+ 1; j
< n
; j
++)
435 w
+= V
[nodes
[i
]][nodes
[j
]];
439 /* the number of edges */
440 static int weight_num(int *nodes
, int n
)
444 for (i
= 0; i
< n
; i
++)
445 for (j
= i
+ 1; j
< n
; j
++)
446 if (V
[nodes
[i
]][nodes
[j
]])
451 /* the weight of maximum spanning tree */
452 static int weight_mst(int *nodes
, int n
)
454 int nei
[NCOLORS
][NCOLORS
] = {{0}}; /* adjacency list */
455 int nnei
[NCOLORS
] = {0}; /* number of neighbors */
456 int wei
[NCOLORS
][NCOLORS
] = {{0}}; /* weight of edges */
457 int sel
[NCOLORS
][NCOLORS
] = {{0}}; /* selected edges in mst */
458 int mark
[NCOLORS
]; /* component of vertices */
460 int oldmark
, newmark
;
462 /* create the induced subgraph of nodes */
463 for (i
= 0; i
< n
; i
++) {
464 for (j
= 0; j
< n
; j
++) {
465 wei
[i
][j
] = V
[nodes
[i
]][nodes
[j
]];
467 nei
[i
][nnei
[i
]++] = j
;
470 /* each node has its own component at the beginning */
471 for (i
= 0; i
< n
; i
++)
473 /* add n - 1 edges */
474 for (i
= 0; i
< n
- 1; i
++) {
475 int maxu
= 0, maxv
= 1;
477 /* finding the maximum edge */
478 for (u
= 0; u
< n
; u
++) {
479 for (j
= 0; j
< nnei
[u
]; j
++) {
481 if (maxedge
< wei
[u
][v
] && !sel
[u
][v
] &&
482 mark
[u
] != mark
[v
]) {
490 /* merging the components */
491 oldmark
= mark
[maxu
];
492 newmark
= mark
[maxv
];
493 for (u
= 0; u
< n
; u
++)
494 if (mark
[u
] == oldmark
)
497 /* calculating the weight of the spanning tree */
498 for (u
= 0; u
< n
; u
++) {
499 for (j
= 0; j
< nnei
[u
]; j
++) {
501 if (u
< v
&& sel
[u
][v
])
508 /* print a motif and its weight */
509 static void printmotif(int *nodes
)
513 while (nodes
[n
] >= 0)
516 for (j
= 0; j
< n
; j
++)
517 printf("%s ", names
[nodes
[j
]]);
519 printf("\t%d %d %d %d",
520 weight_mst(nodes
, n
),
521 weight_sum(nodes
, n
),
522 weight_min(nodes
, n
),
523 weight_num(nodes
, n
));
528 static void printall(void)
532 n
= motifs(sols
, NSOLS
);
533 for (i
= 0; i
< n
; i
++)
538 /* the kavosh enumeration algorithm */
540 /* generate all C(k, n) ways of selecting k integers from {0, ..., n - 1} */
541 static int kavosh_comb(int k
, int n
, int *idx
)
544 /* find the first element that could be moved right */
545 for (i
= k
- 1; i
>= 0; i
--)
546 if (idx
[i
] < n
- (k
- i
- 1) - 1)
551 for (j
= i
+ 1; j
< k
; j
++)
552 idx
[j
] = idx
[i
] + (j
- i
);
556 /* extend the given subgraph starting at the given depth */
557 static int kavosh_fill(int *nodes
, int *prev
, int *used
, int rem
)
559 int cur
[NNODES
]; /* vertices in this depth */
560 int idx
[NNODES
]; /* index of selected vertices in cur[] */
561 int *sel
; /* inserted vertices into nodes */
565 if (doheur
&& !feasible(nodes
, rem
))
568 return subgraph(nodes
);
570 * put the vertices with the given depth into cur[] and mark
573 for (i
= 0; prev
[i
] >= 0; i
++) {
575 for (j
= 0; N
[u
][j
] >= 0; j
++) {
577 if (!used
[v
] && N
[v
][0] >= 0) {
584 /* where to insert new nodes */
588 if (doheur
&& ncur
* (ncur
- 1) / 2 >= matchthresh
&& !feasible_match(nodes
))
590 /* try all subgraphs with k vertices at this depth */
591 for (k
= 1; k
<= MIN(ncur
, rem
); k
++) {
592 for (i
= 0; i
< k
; i
++)
595 /* enumerate all sel[] of size k from cur[] of size ncur */
597 for (i
= 0; i
< k
; i
++)
598 sel
[i
] = cur
[idx
[i
]];
599 ret
+= kavosh_fill(nodes
, sel
, used
, rem
- k
);
600 } while (!kavosh_comb(k
, ncur
, idx
));
603 for (i
= 0; cur
[i
] >= 0; i
++)
609 /* an optimized version of the Kavosh algorithm */
610 static int kavosh_enum(int v
, int *used
, int nsub
)
612 int nodes
[NNODES
] = {v
, -1};
614 return kavosh_fill(nodes
, nodes
, used
, nsub
- 1);
618 /* the fanmod enumeration algorithm */
620 /* insert the neighbors of w into the extension list */
621 static int fanmod_addext(int *sub_map
, int *ext_map
, int *ext
, int *used
, int w
)
625 for (i
= 0; N
[w
][i
] >= 0; i
++) {
627 if (used
[u
] || sub_map
[u
] || ext_map
[u
])
636 /* remove the vertices added via fanmod_addext */
637 static void fanmod_delext(int *ext_map
, int *ext
)
640 for (i
= 0; ext
[i
] >= 0; i
++)
645 /* recursively add vertices to sub; see the comments in fanmod_enum() */
646 static int fanmod_fill(int *used
, int *sub
, int *sub_map
,
647 int *ext
, int *ext_map
, int rem
)
649 int n_sub
= 0; /* number of entries in sub */
650 int n_ext
= 0; /* number of entries in ext */
653 if (doheur
&& !feasible(sub
, rem
))
656 return subgraph(sub
);
657 while (sub
[n_sub
] >= 0)
659 while (ext
[n_ext
] >= 0)
661 if (doheur
&& n_ext
>= matchthresh
&& !feasible_match(sub
))
663 for (i
= 0; i
< n_ext
; i
++) {
667 fanmod_addext(sub_map
, ext_map
, ext
+ n_ext
, used
, ext
[i
]);
668 found
+= fanmod_fill(used
, sub
, sub_map
, ext
+ i
+ 1, ext_map
, rem
- 1);
669 fanmod_delext(ext_map
, ext
+ n_ext
);
676 /* an optimized version of the ESU algorithm */
677 static int fanmod_enum(int v
, int *used
, int nsub
)
679 int sub
[NNODES
] = {v
, -1}; /* the -1 terminated subgraph vertices */
680 int sub_map
[NNODES
] = {0}; /* sub_map[i] is one if i is in sub */
681 int ext
[NNODES
] = {-1}; /* the -1 terminated extension list */
682 int ext_map
[NNODES
] = {0}; /* ext_map[i] is one if i is in ext */
684 fanmod_addext(sub_map
, ext_map
, ext
, used
, v
);
685 return fanmod_fill(used
, sub
, sub_map
, ext
, ext_map
, nsub
- 1);
689 /* count the number of reachable colors from a vertex */
690 static int reachable(int v
, int *used
, int nsub
)
692 int mark
[NNODES
] = {0};
693 int clrs
[NCOLORS
] = {0};
701 /* the BFS algorithm */
702 while (qhead
< qtail
) {
703 if (nclrs
>= nsub
&& qtail
>= nsub
)
706 /* counting the colors in v */
707 for (i
= 0; C
[v
][i
] >= 0; i
++) {
708 if (!clrs
[C
[v
][i
]]) {
715 /* adding neighbors of v to queue */
716 for (i
= 0; N
[v
][i
] >= 0; i
++) {
718 if (!mark
[u
] && !used
[u
]) {
720 mark
[u
] = mark
[v
] + 1;
724 /* the neighborhood has nsub colors and the component nsub vertices */
725 return nclrs
>= nsub
&& qtail
>= nsub
;