-S option implemented: works only on the given strand (still not implemented for...
[matrix_rider.git] / main.c
blob916d5b95fdf6fc9a4b26657a4a891799c1ff07c2
1 /* main.c
2 //
3 // Copyright (C) 2008 Elena Grassi <grassi.e@gmail.com>
4 //
5 // This program is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by the
7 // Free Software Foundation; either version 2, or (at your option) any later
8 // version.
9 //
10 // This program is distributed in the hope that it will be useful, but
11 // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
12 // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 // for more details.
16 #include <stdlib.h>
17 #include <stdio.h>
18 #include <string.h>
19 #include <assert.h>
20 #include "total_affinity.h"
23 Function: main
25 Drives the execution following command line arguments.
27 Parameters:
29 argc - int (default): the number of given arguments.
30 argv - char** (default): array of arguments strings.
32 int main(int argc, char *argv[])
35 run info = NULL;
36 if (set_beginning_info(&info))
37 return ERROR;
39 parse_command_line(argv, argc, info);
41 #ifdef DEBUG
42 printf("#Starting with window %d, revcomp %d, count %d, cutoff %lf, "
43 "abs_cutoff %lf, given_abs_cutoff %d, log2 %d, slide %d, only_max %d "
44 "error_status %d\n",
45 info->window, info->revcomp, info->counts, info->cutoff,
46 info->abs_cutoff, info->given_abs_cutoff, info->log2,
47 info->slide, info->only_max, info->error);
48 #endif /* DEBUG */
50 check_error(info);
52 if (info->window == 0 && !info->miRNA) /* if in syteseeker mode we always use loglikelihoods. */
53 info->log2 = 1;
55 get_matrixes(info);
56 check_error(info);
58 if (info->f_cutoffs) {
59 get_cutoffs(info);
60 check_error(info);
62 #ifdef DEBUG
63 printf("#Loaded %d matrixes\n", info->n_matrixes);
64 print_matrixes(info);
65 #endif /* DEBUG */
67 load_fasta(info);
68 check_error(info);
70 #ifdef DEBUG
71 printf("#Loaded %d fasta\n", info->n_fasta);
72 print_fasta(info);
73 #endif /* DEBUG */
75 int i, j = 0;
76 double *d = NULL;
77 int how_many = 0;
78 for (i = 0; i < info->n_fasta; i++) {
79 printf(">%s\n", (info->fas)[i]->id);
81 if (!i || !info->f_background) {
82 for (j = 0; j < info->n_matrixes; j++) {
83 /* !i because at least we have to get ll foreach matrix at the beginning.
84 !info->f_background because if we have different bg for every
85 fasta we have to do this everytime and not foreach matrix only. */
86 assign_ll((info->matrixes)[j], (info->fas)[i],
87 info);
88 check_error(info);
92 if (info->window != 0) {
93 if (info->window != -1) {
94 //sliding window mode
95 int W = info->window;
96 int J = info->slide;
97 strong_assert(W % J == 0);
98 int Csize = W/J;
99 int N_mat = info->n_matrixes;
100 char *seq = (info->fas)[i]->seq;//check the pointer
101 int seq_len = (info->fas)[i]->length;
102 strong_assert(seq_len >= W);
104 int p = 0;
105 int j = 0;
106 for(j=0; j < N_mat; j++){
107 matrix_ll m = (info->matrixes)[j];
108 for(p=0; p <= W-2*J; p+=J){
109 int tot_match_noN=0;
110 int corner = p + J + m->length -1 < seq_len ? p + J + m->length -1 : seq_len -1;
112 (m->cache)[p/J + 1] = matrix_little_window_tot(m, info->fas[i], p , p + J , &tot_match_noN, info);
113 (m->normaliz_cache)[p/J + 1] = tot_match_noN;
114 (m->cache)[p/J + 1] += matrix_little_window_tot(m, info->fas[i], p + J - m->length +1, corner, &tot_match_noN, info);
115 (m->normaliz_cache)[p/J + 1] += tot_match_noN;
116 // the first index was not initialized, before using cache we scroll them
120 p=0;
121 while (p <= seq_len - W) {
122 for (j = 0; j < N_mat; j++) {
123 matrix_ll m = info->matrixes[j];
124 int tot_match_noN=0;
126 /* cache scroll
127 consier unsing fifo instead of array
128 or even better an object with a fifo and a running sum
130 int c = 0;
131 for(c=0; c<Csize-1; c++){
132 ((m)->cache)[c]=((m)->cache)[c+1];
133 ((m)->normaliz_cache)[c]=((m)->normaliz_cache)[c+1];
135 /************************/
137 int corner = p + W + m->length -1 < seq_len ? p + W + m->length -1 : seq_len -1;
138 double portion_aff_corner_case = matrix_little_window_tot(m, info->fas[i], p + W - m->length +1, corner, &tot_match_noN, info);
139 ((m)->normaliz_cache)[Csize-1] = tot_match_noN;
140 double portion_aff = matrix_little_window_tot(m, info->fas[i], p + W - J, p + W, &tot_match_noN, info);
141 ((m)->cache)[Csize-1] = portion_aff + portion_aff_corner_case;
142 ((m)->normaliz_cache)[Csize-1] += tot_match_noN;
143 //the preceding protions are in the cache
145 double tot_aff = 0;
146 for(c=0; c < Csize-1; c++){
147 tot_aff+=((m)->cache)[c];
149 tot_aff += portion_aff;
151 int tot_match_noN_sum = 0;
152 for(c=0; c < Csize-1; c++){
153 tot_match_noN_sum += ((m)->normaliz_cache)[c];
155 tot_match_noN_sum += tot_match_noN; // the last tot_match_noN computed are those of te portions without corner cases
157 if(tot_match_noN_sum == 0 && info->skip_N_regions){
158 break;
160 if(info->normalize_on_seq_len){
161 tot_aff/=tot_match_noN_sum;
163 printf("%d\t%s\t%g\n",
164 p, (m)->name, tot_aff);
166 p+=J;
169 /* old way
170 printf("%s\n",
171 (info->matrixes)[j]->name);
172 d = matrix_run((info->matrixes)[j],
173 (info->fas)[i],
174 &how_many, info);
175 check_error(info);
176 if (d)
177 print_window_affinities(d, info->window - info->matrixes[j]->length + 1, how_many, info->slide);
178 free(d);
181 } else {
182 // info->window == -1 (single window mode)
183 for (j = 0; j < info->n_matrixes; j++) {
184 if ((!i || !info->f_background) && info->miRNA) { //we need a cutoff for miRNA mode.
185 assign_cutoff((info->matrixes)[j], info);
187 //printf("%s\t",
188 // (info->matrixes)[j]->name);
189 //printf("%g\n",
190 // matrix_little_window_tot((info->matrixes)[j], (info->fas)[i]->seq, -1, -1, info));
191 int tot_match_noN = 0;
192 double tot_aff = matrix_little_window_tot((info->matrixes)[j], (info->fas)[i], 0, -1, &tot_match_noN, info);
193 if(info->normalize_on_seq_len){
194 tot_aff/=tot_match_noN;
196 printf("%s\t%g\n", (info->matrixes)[j]->name, tot_aff);
199 } else {
200 //single site mode
201 for (j = 0; j < info->n_matrixes; j++) {
202 if (!i || !info->f_background) {
203 assign_cutoff((info->matrixes)[j],
204 info);
206 if (info->miRNA)
207 find_single_strand_matches((info->matrixes)[j],
208 (info->fas[i]), info);
209 else
210 matrix_cutoff_print((info->matrixes)[j],
211 (info->fas[i]), info);
212 check_error(info);
216 #ifdef DEBUG
217 printf("#Loaded %d matrixes\n", info->n_matrixes);
218 print_matrixes(info);
219 #endif /* DEBUG */
221 tidy(info);
222 return OK;