Bug #1267: Integrate METIS graph partitioning library
[charm.git] / src / libs / ck-libs / metis / GKlib / itemsets.c
blob65b5af40d0f1df524776cc3d1523522096168d1a
1 /*!
2 * \file
3 * \brief Frequent/Closed itemset discovery routines
5 * This file contains the code for finding frequent/closed itemests. These routines
6 * are implemented using a call-back mechanism to deal with the discovered itemsets.
8 * \date 6/13/2008
9 * \author George Karypis
10 * \version\verbatim $Id: itemsets.c 11075 2011-11-11 22:31:52Z karypis $ \endverbatim
13 #include <GKlib.h>
15 /*-------------------------------------------------------------*/
16 /*! Data structures for use within this module */
17 /*-------------------------------------------------------------*/
18 typedef struct {
19 int minfreq; /* the minimum frequency of a pattern */
20 int maxfreq; /* the maximum frequency of a pattern */
21 int minlen; /* the minimum length of the requested pattern */
22 int maxlen; /* the maximum length of the requested pattern */
23 int tnitems; /* the initial range of the item space */
25 /* the call-back function */
26 void (*callback)(void *stateptr, int nitems, int *itemids, int ntrans, int *transids);
27 void *stateptr; /* the user-supplied pointer to pass to the callback */
29 /* workspace variables */
30 int *rmarker;
31 gk_ikv_t *cand;
32 } isparams_t;
35 /*-------------------------------------------------------------*/
36 /*! Prototypes for this module */
37 /*-------------------------------------------------------------*/
38 void itemsets_find_frequent_itemsets(isparams_t *params, gk_csr_t *mat,
39 int preflen, int *prefix);
40 gk_csr_t *itemsets_project_matrix(isparams_t *param, gk_csr_t *mat, int cid);
44 /*************************************************************************/
45 /*! The entry point of the frequent itemset discovery code */
46 /*************************************************************************/
47 void gk_find_frequent_itemsets(int ntrans, ssize_t *tranptr, int *tranind,
48 int minfreq, int maxfreq, int minlen, int maxlen,
49 void (*process_itemset)(void *stateptr, int nitems, int *itemids,
50 int ntrans, int *transids),
51 void *stateptr)
53 ssize_t i;
54 gk_csr_t *mat, *pmat;
55 isparams_t params;
56 int *pattern;
58 /* Create the matrix */
59 mat = gk_csr_Create();
60 mat->nrows = ntrans;
61 mat->ncols = tranind[gk_iargmax(tranptr[ntrans], tranind)]+1;
62 mat->rowptr = gk_zcopy(ntrans+1, tranptr, gk_zmalloc(ntrans+1, "gk_find_frequent_itemsets: mat.rowptr"));
63 mat->rowind = gk_icopy(tranptr[ntrans], tranind, gk_imalloc(tranptr[ntrans], "gk_find_frequent_itemsets: mat.rowind"));
64 mat->colids = gk_iincset(mat->ncols, 0, gk_imalloc(mat->ncols, "gk_find_frequent_itemsets: mat.colids"));
66 /* Setup the parameters */
67 params.minfreq = minfreq;
68 params.maxfreq = (maxfreq == -1 ? mat->nrows : maxfreq);
69 params.minlen = minlen;
70 params.maxlen = (maxlen == -1 ? mat->ncols : maxlen);
71 params.tnitems = mat->ncols;
72 params.callback = process_itemset;
73 params.stateptr = stateptr;
74 params.rmarker = gk_ismalloc(mat->nrows, 0, "gk_find_frequent_itemsets: rmarker");
75 params.cand = gk_ikvmalloc(mat->ncols, "gk_find_frequent_itemsets: cand");
77 /* Perform the initial projection */
78 gk_csr_CreateIndex(mat, GK_CSR_COL);
79 pmat = itemsets_project_matrix(&params, mat, -1);
80 gk_csr_Free(&mat);
82 pattern = gk_imalloc(pmat->ncols, "gk_find_frequent_itemsets: pattern");
83 itemsets_find_frequent_itemsets(&params, pmat, 0, pattern);
85 gk_csr_Free(&pmat);
86 gk_free((void **)&pattern, &params.rmarker, &params.cand, LTERM);
92 /*************************************************************************/
93 /*! The recursive routine for DFS-based frequent pattern discovery */
94 /*************************************************************************/
95 void itemsets_find_frequent_itemsets(isparams_t *params, gk_csr_t *mat,
96 int preflen, int *prefix)
98 ssize_t i;
99 gk_csr_t *cmat;
101 /* Project each frequent column */
102 for (i=0; i<mat->ncols; i++) {
103 prefix[preflen] = mat->colids[i];
105 if (preflen+1 >= params->minlen)
106 (*params->callback)(params->stateptr, preflen+1, prefix,
107 mat->colptr[i+1]-mat->colptr[i], mat->colind+mat->colptr[i]);
109 if (preflen+1 < params->maxlen) {
110 cmat = itemsets_project_matrix(params, mat, i);
111 itemsets_find_frequent_itemsets(params, cmat, preflen+1, prefix);
112 gk_csr_Free(&cmat);
119 /******************************************************************************/
120 /*! This function projects a matrix w.r.t. to a particular column.
121 It performs the following steps:
122 - Determines the length of each column that is remaining
123 - Sorts the columns in increasing length
124 - Creates a column-based version of the matrix with the proper
125 column ordering and renamed rowids.
127 /*******************************************************************************/
128 gk_csr_t *itemsets_project_matrix(isparams_t *params, gk_csr_t *mat, int cid)
130 ssize_t i, j, k, ii, pnnz;
131 int nrows, ncols, pnrows, pncols;
132 ssize_t *colptr, *pcolptr;
133 int *colind, *colids, *pcolind, *pcolids, *rmarker;
134 gk_csr_t *pmat;
135 gk_ikv_t *cand;
137 nrows = mat->nrows;
138 ncols = mat->ncols;
139 colptr = mat->colptr;
140 colind = mat->colind;
141 colids = mat->colids;
143 rmarker = params->rmarker;
144 cand = params->cand;
147 /* Allocate space for the projected matrix based on what you know thus far */
148 pmat = gk_csr_Create();
149 pmat->nrows = pnrows = (cid == -1 ? nrows : colptr[cid+1]-colptr[cid]);
152 /* Mark the rows that will be kept and determine the prowids */
153 if (cid == -1) { /* Initial projection */
154 gk_iset(nrows, 1, rmarker);
156 else { /* The other projections */
157 for (i=colptr[cid]; i<colptr[cid+1]; i++)
158 rmarker[colind[i]] = 1;
162 /* Determine the length of each column that will be left in the projected matrix */
163 for (pncols=0, pnnz=0, i=cid+1; i<ncols; i++) {
164 for (k=0, j=colptr[i]; j<colptr[i+1]; j++) {
165 k += rmarker[colind[j]];
167 if (k >= params->minfreq && k <= params->maxfreq) {
168 cand[pncols].val = i;
169 cand[pncols++].key = k;
170 pnnz += k;
174 /* Sort the columns in increasing order */
175 gk_ikvsorti(pncols, cand);
178 /* Allocate space for the remaining fields of the projected matrix */
179 pmat->ncols = pncols;
180 pmat->colids = pcolids = gk_imalloc(pncols, "itemsets_project_matrix: pcolids");
181 pmat->colptr = pcolptr = gk_zmalloc(pncols+1, "itemsets_project_matrix: pcolptr");
182 pmat->colind = pcolind = gk_imalloc(pnnz, "itemsets_project_matrix: pcolind");
185 /* Populate the projected matrix */
186 pcolptr[0] = 0;
187 for (pnnz=0, ii=0; ii<pncols; ii++) {
188 i = cand[ii].val;
189 for (j=colptr[i]; j<colptr[i+1]; j++) {
190 if (rmarker[colind[j]])
191 pcolind[pnnz++] = colind[j];
194 pcolids[ii] = colids[i];
195 pcolptr[ii+1] = pnnz;
199 /* Reset the rmarker array */
200 if (cid == -1) { /* Initial projection */
201 gk_iset(nrows, 0, rmarker);
203 else { /* The other projections */
204 for (i=colptr[cid]; i<colptr[cid+1]; i++)
205 rmarker[colind[i]] = 0;
209 return pmat;