rangi: improve some of the comments
[rangi.git] / fastafilt.c
blob475f70506a651c61105c84649f8421cb3e643424
1 /*
2 * fastafilt, filter fasta files based on its protein complexes
4 * Copyright (C) 2012 Ali Gholami Rudi <ali at rudi dot ir>
6 * This program is released under the modified BSD license.
7 */
8 #include <stdio.h>
9 #include <string.h>
11 #define NPROTS (1 << 12)
12 #define NLEN (128)
14 static char prots[NPROTS][NLEN];
15 static int nprots;
17 static int prot_find(char *name)
19 int i;
20 for (i = 0; i < nprots; i++)
21 if (!strcmp(name, prots[i]))
22 return i;
23 return -1;
26 static void prot_add(char *name)
28 if (prot_find(name) >= 0)
29 return;
30 strcpy(prots[nprots++], name);
33 static void prot_read(FILE *fin)
35 char name[NLEN];
36 while (fscanf(fin, "%s", name) == 1)
37 prot_add(name);
40 static void fasta_filt(FILE *fin)
42 char line[256];
43 char name[NLEN];
44 int matched = 0;
45 while (fgets(line, sizeof(line), fin)) {
46 if (line[0] != '>') {
47 if (matched)
48 printf("%s", line);
49 continue;
51 sscanf(line + 1, "%s", name);
52 matched = prot_find(name) >= 0;
53 if (matched)
54 printf("%s", line);
58 int main(int argc, char *argv[])
60 FILE *fin = argc > 1 ? fopen(argv[1], "r") : NULL;
61 if (!fin) {
62 printf("usage: %s in.fasta <complexes >out.fasta\n", argv[0]);
63 return 0;
65 prot_read(stdin);
66 fasta_filt(fin);
67 fclose(fin);
68 return 0;