Bug #1267: Integrate METIS graph partitioning library
[charm.git] / src / libs / ck-libs / metis / GKlib / seq.c
blobf267a3ea027853ab66c54c40e8772833cb03ed8d
1 /*
3 * Sequence handler library by Huzefa Rangwala
4 * Date : 03.01.2007
8 */
11 #include <GKlib.h>
16 /*********************************************************/
17 /* ! \brief Initializes the <tt>gk_seq_t</tt> variable
22 \param A pointer to gk_seq_t itself
23 \returns null
25 /***********************************************************************/
27 void gk_seq_init(gk_seq_t *seq)
30 seq->len = 0;
31 seq->sequence = NULL;
33 seq->pssm = NULL;
34 seq->psfm = NULL;
36 seq->name = NULL;
40 /***********************************************************************/
41 /*! \brief This function creates the localizations for the various sequences
43 \param string i.e amino acids, nucleotides, sequences
44 \returns gk_i2cc2i_t variable
46 /*********************************************************************/
48 gk_i2cc2i_t *gk_i2cc2i_create_common(char *alphabet)
52 int nsymbols;
53 gk_idx_t i;
54 gk_i2cc2i_t *t;
56 nsymbols = strlen(alphabet);
57 t = gk_malloc(sizeof(gk_i2cc2i_t),"gk_i2c_create_common");
58 t->n = nsymbols;
59 t->i2c = gk_cmalloc(256, "gk_i2c_create_common");
60 t->c2i = gk_imalloc(256, "gk_i2c_create_common");
63 gk_cset(256, -1, t->i2c);
64 gk_iset(256, -1, t->c2i);
66 for(i=0;i<nsymbols;i++){
67 t->i2c[i] = alphabet[i];
68 t->c2i[(int)alphabet[i]] = i;
71 return t;
76 /*********************************************************************/
77 /*! \brief This function reads a pssm in the format of gkmod pssm
79 \param file_name is the name of the pssm file
80 \returns gk_seq_t
82 /********************************************************************/
83 gk_seq_t *gk_seq_ReadGKMODPSSM(char *filename)
85 gk_seq_t *seq;
86 gk_idx_t i, j, ii;
87 size_t ntokens, nbytes, len;
88 FILE *fpin;
91 gk_Tokens_t tokens;
92 static char *AAORDER = "ARNDCQEGHILKMFPSTWYVBZX*";
93 static int PSSMWIDTH = 20;
94 char *header, line[MAXLINELEN];
95 gk_i2cc2i_t *converter;
97 header = gk_cmalloc(PSSMWIDTH, "gk_seq_ReadGKMODPSSM: header");
99 converter = gk_i2cc2i_create_common(AAORDER);
101 gk_getfilestats(filename, &len, &ntokens, NULL, &nbytes);
102 len --;
104 seq = gk_malloc(sizeof(gk_seq_t),"gk_seq_ReadGKMODPSSM");
105 gk_seq_init(seq);
107 seq->len = len;
108 seq->sequence = gk_imalloc(len, "gk_seq_ReadGKMODPSSM");
109 seq->pssm = gk_iAllocMatrix(len, PSSMWIDTH, 0, "gk_seq_ReadGKMODPSSM");
110 seq->psfm = gk_iAllocMatrix(len, PSSMWIDTH, 0, "gk_seq_ReadGKMODPSSM");
112 seq->nsymbols = PSSMWIDTH;
113 seq->name = gk_getbasename(filename);
115 fpin = gk_fopen(filename,"r","gk_seq_ReadGKMODPSSM");
118 /* Read the header line */
119 if (fgets(line, MAXLINELEN-1, fpin) == NULL)
120 errexit("Unexpected end of file: %s\n", filename);
121 gk_strtoupper(line);
122 gk_strtokenize(line, " \t\n", &tokens);
124 for (i=0; i<PSSMWIDTH; i++)
125 header[i] = tokens.list[i][0];
127 gk_freetokenslist(&tokens);
130 /* Read the rest of the lines */
131 for (i=0, ii=0; ii<len; ii++) {
132 if (fgets(line, MAXLINELEN-1, fpin) == NULL)
133 errexit("Unexpected end of file: %s\n", filename);
134 gk_strtoupper(line);
135 gk_strtokenize(line, " \t\n", &tokens);
137 seq->sequence[i] = converter->c2i[(int)tokens.list[1][0]];
139 for (j=0; j<PSSMWIDTH; j++) {
140 seq->pssm[i][converter->c2i[(int)header[j]]] = atoi(tokens.list[2+j]);
141 seq->psfm[i][converter->c2i[(int)header[j]]] = atoi(tokens.list[2+PSSMWIDTH+j]);
146 gk_freetokenslist(&tokens);
147 i++;
150 seq->len = i; /* Reset the length if certain characters were skipped */
152 gk_free((void **)&header, LTERM);
153 gk_fclose(fpin);
155 return seq;
159 /**************************************************************************/
160 /*! \brief This function frees the memory allocated to the seq structure.
162 \param gk_seq_t
163 \returns nothing
165 /**************************************************************************/
166 void gk_seq_free(gk_seq_t *seq)
168 gk_iFreeMatrix(&seq->pssm, seq->len, seq->nsymbols);
169 gk_iFreeMatrix(&seq->psfm, seq->len, seq->nsymbols);
170 gk_free((void **)&seq->name, &seq->sequence, LTERM);
171 //gk_free((void **)&seq, LTERM);
172 gk_free((void **) &seq, LTERM);