3 // Copyright (C) 2008 Elena Grassi <grassi.e@gmail.com>
5 // This program is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by the
7 // Free Software Foundation; either version 2, or (at your option) any later
10 // This program is distributed in the hope that it will be useful, but
11 // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
12 // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
20 #include "total_affinity.h"
25 Drives the execution following command line arguments.
29 argc - int (default): the number of given arguments.
30 argv - char** (default): array of arguments strings.
32 int main(int argc
, char *argv
[])
36 if (set_beginning_info(&info
))
39 parse_command_line(argv
, argc
, info
);
42 printf("#Starting with window %d, revcomp %d, count %d, cutoff %lf, "
43 "abs_cutoff %lf, given_abs_cutoff %d, log2 %d, slide %d, only_max %d "
45 info
->window
, info
->revcomp
, info
->counts
, info
->cutoff
,
46 info
->abs_cutoff
, info
->given_abs_cutoff
, info
->log2
,
47 info
->slide
, info
->only_max
, info
->error
);
52 if (info
->window
== 0 && !info
->miRNA
) /* if in syteseeker mode we always use loglikelihoods. */
58 if (info
->f_cutoffs
) {
63 printf("#Loaded %d matrixes\n", info
->n_matrixes
);
71 printf("#Loaded %d fasta\n", info
->n_fasta
);
78 for (i
= 0; i
< info
->n_fasta
; i
++) {
79 printf(">%s\n", (info
->fas
)[i
]->id
);
81 if (!i
|| !info
->f_background
) {
82 for (j
= 0; j
< info
->n_matrixes
; j
++) {
83 /* !i because at least we have to get ll foreach matrix at the beginning.
84 !info->f_background because if we have different bg for every
85 fasta we have to do this everytime and not foreach matrix only. */
86 assign_ll((info
->matrixes
)[j
], (info
->fas
)[i
],
92 if (info
->window
!= 0) {
93 if (info
->window
!= -1) {
97 strong_assert(W
% J
== 0);
99 int N_mat
= info
->n_matrixes
;
100 char *seq
= (info
->fas
)[i
]->seq
;//check the pointer
101 int seq_len
= (info
->fas
)[i
]->length
;
102 strong_assert(seq_len
>= W
);
106 for(j
=0; j
< N_mat
; j
++){
107 matrix_ll m
= (info
->matrixes
)[j
];
108 for(p
=0; p
<= W
-2*J
; p
+=J
){
110 int corner
= p
+ J
+ m
->length
-1 < seq_len
? p
+ J
+ m
->length
-1 : seq_len
-1;
112 (m
->cache
)[p
/J
+ 1] = matrix_little_window_tot(m
, info
->fas
[i
], p
, p
+ J
, &tot_match_noN
, info
);
113 (m
->normaliz_cache
)[p
/J
+ 1] = tot_match_noN
;
114 (m
->cache
)[p
/J
+ 1] += matrix_little_window_tot(m
, info
->fas
[i
], p
+ J
- m
->length
+1, corner
, &tot_match_noN
, info
);
115 (m
->normaliz_cache
)[p
/J
+ 1] += tot_match_noN
;
116 // the first index was not initialized, before using cache we scroll them
121 while (p
<= seq_len
- W
) {
122 for (j
= 0; j
< N_mat
; j
++) {
123 matrix_ll m
= info
->matrixes
[j
];
127 consier unsing fifo instead of array
128 or even better an object with a fifo and a running sum
131 for(c
=0; c
<Csize
-1; c
++){
132 ((m
)->cache
)[c
]=((m
)->cache
)[c
+1];
133 ((m
)->normaliz_cache
)[c
]=((m
)->normaliz_cache
)[c
+1];
135 /************************/
137 int corner
= p
+ W
+ m
->length
-1 < seq_len
? p
+ W
+ m
->length
-1 : seq_len
-1;
138 double portion_aff_corner_case
= matrix_little_window_tot(m
, info
->fas
[i
], p
+ W
- m
->length
+1, corner
, &tot_match_noN
, info
);
139 ((m
)->normaliz_cache
)[Csize
-1] = tot_match_noN
;
140 double portion_aff
= matrix_little_window_tot(m
, info
->fas
[i
], p
+ W
- J
, p
+ W
, &tot_match_noN
, info
);
141 ((m
)->cache
)[Csize
-1] = portion_aff
+ portion_aff_corner_case
;
142 ((m
)->normaliz_cache
)[Csize
-1] += tot_match_noN
;
143 //the preceding protions are in the cache
146 for(c
=0; c
< Csize
-1; c
++){
147 tot_aff
+=((m
)->cache
)[c
];
149 tot_aff
+= portion_aff
;
151 int tot_match_noN_sum
= 0;
152 for(c
=0; c
< Csize
-1; c
++){
153 tot_match_noN_sum
+= ((m
)->normaliz_cache
)[c
];
155 tot_match_noN_sum
+= tot_match_noN
; // the last tot_match_noN computed are those of te portions without corner cases
157 if(tot_match_noN_sum
== 0 && info
->skip_N_regions
){
160 if(info
->normalize_on_seq_len
){
161 tot_aff
/=tot_match_noN_sum
;
163 printf("%d\t%s\t%g\n",
164 p
, (m
)->name
, tot_aff
);
171 (info->matrixes)[j]->name);
172 d = matrix_run((info->matrixes)[j],
177 print_window_affinities(d, info->window - info->matrixes[j]->length + 1, how_many, info->slide);
182 // info->window == -1 (single window mode)
183 for (j
= 0; j
< info
->n_matrixes
; j
++) {
184 if ((!i
|| !info
->f_background
) && info
->miRNA
) { //we need a cutoff for miRNA mode.
185 assign_cutoff((info
->matrixes
)[j
], info
);
188 // (info->matrixes)[j]->name);
190 // matrix_little_window_tot((info->matrixes)[j], (info->fas)[i]->seq, -1, -1, info));
191 int tot_match_noN
= 0;
192 double tot_aff
= matrix_little_window_tot((info
->matrixes
)[j
], (info
->fas
)[i
], 0, -1, &tot_match_noN
, info
);
193 if(info
->normalize_on_seq_len
){
194 tot_aff
/=tot_match_noN
;
196 printf("%s\t%g\n", (info
->matrixes
)[j
]->name
, tot_aff
);
201 for (j
= 0; j
< info
->n_matrixes
; j
++) {
202 if (!i
|| !info
->f_background
) {
203 assign_cutoff((info
->matrixes
)[j
],
207 find_single_strand_matches((info
->matrixes
)[j
],
208 (info
->fas
[i
]), info
);
210 matrix_cutoff_print((info
->matrixes
)[j
],
211 (info
->fas
[i
]), info
);
217 printf("#Loaded %d matrixes\n", info
->n_matrixes
);
218 print_matrixes(info
);