-S option implemented: works only on the given strand (still not implemented for...
[matrix_rider.git] / matrixes.c
blob27310fb1a29ad44997c1bee61466aff3dab16c0e
1 /* matrixes.c
2 //
3 // Copyright (C) 2008 Elena Grassi <grassi.e@gmail.com>
4 //
5 // This program is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by the
7 // Free Software Foundation; either version 2, or (at your option) any later
8 // version.
9 //
10 // This program is distributed in the hope that it will be useful, but
11 // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
12 // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 // for more details.
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <string.h>
19 #include <math.h>
20 #include <assert.h>
21 #include "total_affinity.h"
22 #include "CuTest.h"
24 //XXX fix Paolo's format
26 Function: get_matrixes
27 Loads matrixes from the given file, putting their infos in info.
29 Parameters:
30 info - pointer to run struct with a properly opened FILE* with matrixes
31 (Paolo's format), info->count tells if we already have percentages
32 [0] or pseudocounts [1] or counts [2] of single nucleotides.
33 Error flags will be put there.
36 void get_matrixes(run info)
38 int cache_size = 0;
39 if(info->window !=0 && info->window != -1){
40 assert(info->window % info->slide == 0);
41 cache_size = info->window / info->slide;
43 if (alloc_matrixes(&(info->matrixes),cache_size))
44 info->error = MEMORY_ERROR;
45 matrix_ll *matrixes = info->matrixes;
46 char name[MAX_MATRIX_NAME];
47 char old_name[MAX_MATRIX_NAME];
48 double acgt[BASES];
49 strcpy(old_name, "");
50 strcpy(name, "");
51 int bases_c = 0;
52 int length = 0;
53 int ongoing = 1;
54 int i = 0;
55 while (ongoing) {
56 if (fscanf(info->f_matrixes, "%s\t%*d\t%lf\t%lf\t%lf\t%lf\n",
57 name, &acgt[A], &acgt[C], &acgt[G], &acgt[T]) != 5) {
58 #ifdef DEBUG
59 printf("You fool fscanf!\n");
60 #endif /* DEBUG */
61 if (fgetc(info->f_matrixes) != EOF)
62 info->error = MATRIX_FILE_ERROR;
63 ongoing = 0;
64 break; /* XXX BAD */
66 if (strcmp(name, old_name)) { /* we have a new matrix */
67 #ifdef DEBUG
68 printf("Last was %d, now adding %s\n", length, name);
69 #endif /* DEBUG */
70 if (info->n_matrixes != 0) {
71 (*matrixes)->length = length; /* assign length to last matrix */
72 get_fractions_from_pcounts(*matrixes, info);
73 check_error(info);
74 matrixes++; /* next matrix to be loaded */
76 info->n_matrixes++;
77 bases_c = 0;
78 length = 0;
79 strcpy((*matrixes)->name, name);
81 for (i = 0; i < BASES; i++)
82 (*matrixes)->freq[length][i] = acgt[i];
83 length++;
84 if (info->n_matrixes >= MAX_MATRIXES
85 || length >= MAX_MATRIX_LENGTH) {
86 ongoing = 0;
87 info->error = MATRIX_LIMIT_ERROR;
89 strcpy(old_name, name);
91 (*matrixes)->length = length; /* assign length to last matrix */
92 get_fractions_from_pcounts(*matrixes, info);
95 //XXX
96 void get_cutoffs(run info)
98 double abs_c = 0.0;
99 double frac_c = 0.0;
100 int i, c = 0;
101 char name[MAX_MATRIX_NAME];
102 int found[info->n_matrixes];
103 for (; i < info->n_matrixes; i++)
104 found[i] = 0;
105 /* matrix name \t absolute_cutoff \t fractional_cutoff */
106 while (fscanf
107 (info->f_cutoffs, "%s\t\t%lf\t%lf\n", name, &abs_c,
108 &frac_c) == 3) {
109 if (frac_c > 1 || frac_c < 0) {
110 fprintf(stderr,
111 "WARNING: invalid fractional cutoff for %s matrix!\n ",
112 name);
113 info->error = CUTOFF_FILE_ERROR;
114 return;
116 int matrix_index = find_matrix_index(info, name);
117 if (matrix_index == -1) {
118 /* We issue a warning for unknown matrixes. */
119 fprintf(stderr,
120 "WARNING: matrix %s in the cutoff file is not in the matrix file, ignoring it\n",
121 name);
122 } else if (found[matrix_index]) {
123 /* We exit if a double matrix is found. */
124 fprintf(stderr,
125 "WARNING: matrix %s in the cutoff file is doubled!\n",
126 name);
127 info->error = CUTOFF_FILE_ERROR;
128 return;
129 } else {
130 c++;
131 found[matrix_index] = 1;
132 info->matrixes[matrix_index]->abs_cutoff = abs_c;
133 info->matrixes[matrix_index]->frac_cutoff = frac_c;
136 if (fgetc(info->f_cutoffs) != EOF) {
137 info->error = CUTOFF_FILE_ERROR;
138 return;
140 if (c != info->n_matrixes) {
141 fprintf(stderr,
142 "WARNING: Some matrixes are missing in the cutoff file:\n");
143 for (i = 0; i < info->n_matrixes; i++) {
144 if (!found[i]) {
145 fprintf(stderr, "%s\n",
146 info->matrixes[i]->name);
149 info->error = CUTOFF_FILE_ERROR;
150 return;
154 //XXX
155 int find_matrix_index(run info, char *name)
157 /* Non-duplicate matrix names is a prerequisite. */
158 /* Use an hash to be more efficient. */
159 int i = 0;
160 for (; i < info->n_matrixes; i++) {
161 if (!strcmp(info->matrixes[i]->name, name))
162 return i;
164 return -1;
168 Function: get_fractions_from_pcounts.
170 Get nucleotides frequencies for a given matrix_ll with counts or pseudocounts
171 stored in the freq array and store there the frequency.
172 If count evaluates true (default) 0 values are converted to 1 (pseudocount),
173 otherwise (perc to be implemented, TODO).
174 Issues an error in info->error if it finds a zero total for counts in the
175 given matrix_ll.
177 Parameters:
178 m - matrix_ll with matrixes data.
179 info - run with the desired count.
181 /* possible gain of speed getting ACGT total in get_matrixes */
182 void get_fractions_from_pcounts(matrix_ll m, run info)
184 double tot = 0.0;
185 int i, j = 0;
186 for (j = 0; j < m->length; j++) {
187 for (i = 0; i < BASES; i++) {
188 int val = (int)m->freq[j][i];
189 if (info->counts && val != m->freq[j][i]) { //then it's not an integer
190 info->error = MATRIX_COUNT_ERROR;
191 return;
193 if (m->freq[j][i] <= EEEPSILON && !info->keep_zeroes) {
194 if (info->counts) {
195 m->freq[j][i] = 1;
196 } else {
197 info->error = FREQ_ZERO_ERROR;
198 return;
201 tot += (int)m->freq[j][i];
204 if (info->counts) {
205 if (!tot) {
206 info->error = MATRIX_COUNT_ERROR;
207 return;
209 for (i = 0; i < BASES; i++) {
210 m->freq[j][i] = m->freq[j][i] / tot;
213 tot = 0.0;
218 Function: normalize_fractions
220 TODO
224 Function: assign_ll
226 Computes log-likelihood given a fasta with background informations and a
227 matrix_ll with freq loaded (counts already converted to fractions).
228 This function does not compute affinities, it's only a preprocess step.
230 Parameters:
231 m - matrix_ll with freq loaded and missing ll.
232 f - fasta with background informations to compute log likelihoods.
233 info - pointer to run struct to set ad error flag if there is a zero or
234 negative freq (or background)
236 void assign_ll(matrix_ll m, fasta f, run info)
238 int j = 0;
239 int i = 0;
240 for (; j < m->length; j++)
241 for (i = 0; i < BASES; i++) {
242 if (!info->miRNA)
243 m->ll[j][i] = log2_ratio(m->freq[j][i], f->background[i], info);
244 else
245 m->ll[j][i] = m->freq[j][i];
250 Function: assign_cutoff
252 Assign a cutoff to a matrix_ll using the fractional cutoff
253 in the given run. If given_abs_cutoff evaluates true it uses the
254 weakest cutoff (the smallest) between abs and fractional.
256 Parameters:
257 m - matrix_ll with ll loaded and missing cutoff.
258 info - run with cutoff informations.
260 void assign_cutoff(matrix_ll m, run info)
262 int j = 0;
263 int i = 0;
264 double max_tot = 0;
265 double max;
266 int mmmax;
267 for (; j < m->length; j++) {
268 max = m->ll[j][0];
269 for (i = 1; i < BASES; i++) {
270 if (m->ll[j][i] > max) {
271 max = m->ll[j][i];
272 mmmax=i;
275 max_tot += max;
278 /* If a file with cutoffs is given we will use them, otherwise we have default
279 or given parameters equals for every matrix. */
280 if (info->f_cutoffs) {
281 info->cutoff = m->frac_cutoff;
282 info->abs_cutoff = m->abs_cutoff;
284 m->cutoff = max_tot * info->cutoff;
286 if ((info->given_abs_cutoff || info->f_cutoffs)
287 && m->cutoff > info->abs_cutoff)
288 m->cutoff = info->abs_cutoff;
290 if (info->miRNA) { /* We count matches for miRNA seeking mode. */
291 m->cutoff = max_tot;
295 /* Function: log2_ratio
297 Returns the log2ratio or only the ratio (depending on info->log2),
298 sets error to 1 if the ratio is non-positive.
299 Computes log2ratio if info->log2 evaluates true, ratio otherwise.
301 Parameters:
302 n - double which will be used as numerator.
303 d - double which will be used as denominator.
304 info - run used to set error flags and determine if log has to be applied.
306 Return:
307 double which is the log2-ratio or simple ratio between n and d.
308 (will be 0 if an error occurred).
311 double log2_ratio(double n, double d, run info)
313 if (d < EPSILON) {
314 info->error = BACKGROUND_FREQ_ERROR;
315 return 0;
317 double ratio = n / d;
318 if (ratio <= 0 && !(ratio == 0 && info->keep_zeroes)) {
319 info->error = BACKGROUND_FREQ_ERROR;
320 return 0;
322 if (info->log2)
323 return log(ratio) / log(2);
324 else
325 return ratio;
329 Function: alloc_matrixes
331 Creates space (calloc) for all the matrixes that will be loaded.
333 Parameters:
334 m - matrix_ll ** where the matrixes will be stored.
336 Returns:
337 ERROR (1) if any calloc call failed, OK (0) otherwise.
339 int alloc_matrixes(matrix_ll ** m, int cache_size)
341 *m = (matrix_ll *) calloc(MAX_MATRIXES + 1, sizeof(matrix_ll));
342 if (!(*m)) {
343 free(*m);
344 return ERROR;
346 int i = 0;
347 for (i = 0; i < MAX_MATRIXES; i++) {
348 (*m)[i] = (matrix_ll) calloc(1, sizeof(struct matrix_ll_));
349 if (!((*m)[i])) {
350 free_matrixes(*m, i);
351 return ERROR;
353 (*m)[i]->ll =
354 (double **)calloc(MAX_MATRIX_LENGTH + 1, sizeof(double *));
355 if (!((*m)[i]->ll)) {
356 free_matrixes(*m, i);
357 return ERROR;
359 (*m)[i]->freq =
360 (double **)calloc(MAX_MATRIX_LENGTH + 1, sizeof(double *));
361 if (!((*m)[i]->freq)) {
362 free_matrixes(*m, i);
363 return ERROR;
365 (*m)[i]->cache = NULL;
366 if(cache_size!=0){
367 (*m)[i]->cache =
368 (double *)calloc(cache_size, sizeof(double));
369 if (!((*m)[i]->cache)) {
370 free_matrixes(*m, i);
371 return ERROR;
374 (*m)[i]->normaliz_cache = NULL;
375 if(cache_size!=0){
376 (*m)[i]->normaliz_cache =
377 (int *)calloc(cache_size, sizeof(double));
378 if (!((*m)[i]->normaliz_cache)) {
379 free_matrixes(*m, i);
380 return ERROR;
383 double **cur = NULL;
384 for (cur = (*m)[i]->ll; cur < (*m)[i]->ll + MAX_MATRIX_LENGTH;
385 cur++) {
386 *cur = (double *)calloc(BASES, sizeof(double));
387 if (!(*cur)) {
388 free_matrixes(*m, i);
389 return ERROR;
392 for (cur = (*m)[i]->freq;
393 cur < (*m)[i]->freq + MAX_MATRIX_LENGTH; cur++) {
394 *cur = (double *)calloc(BASES, sizeof(double));
395 if (!(*cur)) {
396 free_matrixes(*m, i);
397 return ERROR;
400 (*m)[i]->name = (char *)calloc(MAX_MATRIX_NAME, sizeof(char));
401 if (!((*m)[i]->name)) {
402 free_matrixes(*m, i);
403 return ERROR;
406 return OK;
410 Function: free_matrixes
412 Frees the space used by all the loaded matrixes.
414 Parameters:
416 m - matrix_ll * with the matrixes to be freed.
417 loaded - number of matrixes to be freed.
419 void free_matrixes(matrix_ll * m, int loaded)
421 int i = 0;
422 for (i = 0; i < loaded; i++) {
423 free(m[i]->name);
424 double **cur = NULL;
425 for (cur = m[i]->ll; cur < m[i]->ll + MAX_MATRIX_LENGTH; cur++)
426 free(*cur);
427 free(m[i]->ll);
428 for (cur = m[i]->freq; cur < m[i]->freq + MAX_MATRIX_LENGTH; cur++)
429 free(*cur);
430 free(m[i]->freq);
431 free(m[i]->cache);
432 free(m[i]->normaliz_cache);
433 free(m[i]);
435 free(m);
439 Function: get_affinity
441 Computes affinity between a matrix and a slice of a string, given the first char to be evaluated.
442 Assumes enough space in the string. Returns total values ("straight" + revcomp).
444 Parameters:
446 m - matrix_ll matrix that will be used.
447 s - char * versus which matrix will be compared.
448 start - int first position for affinity computation.
450 Returns:
452 double with the total affinity (sum of all log_likelihoods) between
453 the given matrix and string.
455 double get_affinity(matrix_ll m, char *s, int start)
457 double tot = 0;
458 int i = 0;
459 while (i < m->length) {
460 /* switch(toupper(s[i])) { //toupper done while loading fasta for efficiency's sake */
461 switch (s[start]) {
462 case 'A':
463 tot += m->ll[i][A] + m->ll[(m->length) - i - 1][T];
464 break;
465 case 'C':
466 tot += m->ll[i][C] + m->ll[(m->length) - i - 1][G];
467 break;
468 case 'G':
469 tot += m->ll[i][G] + m->ll[(m->length) - i - 1][C];
470 break;
471 case 'T':
472 tot += m->ll[i][T] + m->ll[(m->length) - i - 1][A];
473 break;
474 case 'N':
475 tot += 2 * N;
476 break;
478 i++;
479 start++;
481 return tot;
485 Function: get_affinities
487 Computes affinity between a matrix and a slice of a string, given the first char to be evaluated.
488 Assumes enough space in the string. Puts results in the given double array ("straight" and revcomp
489 are evaluated at the same time).
491 Parameters:
493 m - matrix_ll matrix that will be used.
494 s - char * versus which matrix will be compared.
495 start - int first position for affinity computation.
496 results - double * where affinities will be put.
498 void get_affinities(matrix_ll m, char *s, int start, double *results)
500 /* results[0] is straight total, results[1] revcomp */
501 int i = 0;
502 results[0] = 0;
503 results[1] = 0;
504 while (i < m->length) {
505 switch (s[start]) {
506 case 'A':
507 results[0] += m->ll[i][A];
508 results[1] += m->ll[(m->length) - i - 1][T];
509 break;
510 case 'C':
511 results[0] += m->ll[i][C];
512 results[1] += m->ll[(m->length) - i - 1][G];
513 break;
514 case 'G':
515 results[0] += m->ll[i][G];
516 results[1] += m->ll[(m->length) - i - 1][C];
517 break;
518 case 'T':
519 results[0] += m->ll[i][T];
520 results[1] += m->ll[(m->length) - i - 1][A];
521 break;
522 case 'N':
523 results[0] += N;
524 results[1] += N;
525 break;
527 i++;
528 start++;
532 //XXX
533 double get_affinities_nonLog(matrix_ll m, char *s, int start, int *foundN, run info)
535 /* results[0] is straight total, results[1] revcomp */
536 int i = 0;
537 double results[2];
538 results[0] = 1;
539 results[1] = 1;
540 while (i < m->length) {
541 switch (s[start]) {
542 case 'A':
543 results[0] *= m->ll[i][A];
544 results[1] *= m->ll[(m->length) - i - 1][T];
545 break;
546 case 'C':
547 results[0] *= m->ll[i][C];
548 results[1] *= m->ll[(m->length) - i - 1][G];
549 break;
550 case 'G':
551 results[0] *= m->ll[i][G];
552 results[1] *= m->ll[(m->length) - i - 1][C];
553 break;
554 case 'T':
555 results[0] *= m->ll[i][T];
556 results[1] *= m->ll[(m->length) - i - 1][A];
557 break;
558 case 'N':
559 results[0] *= N;
560 results[1] *= N;
561 (*foundN)++;
562 break;
564 i++;
565 start++;
567 if (info->single_strand)
568 return results[0];
569 else
570 return (results[0] > results[1]) ? results[0] : results[1];
573 //XXX
574 double get_affinities_single_strand(matrix_ll m, char *s, int start, int *foundN, run info)
576 double res = 0.0;
577 int i = 0;
578 while (i < m->length) {
579 switch (s[start]) {
580 case 'A':
581 res += m->ll[i][A];
582 break;
583 case 'C':
584 res += m->ll[i][C];
585 break;
586 case 'G':
587 res += m->ll[i][G];
588 break;
589 case 'T':
590 res += m->ll[i][T];
591 break;
592 case 'N':
593 (*foundN)++;
594 res += N;
595 break;
597 i++;
598 start++;
600 if (fabs(res - m->cutoff) <= EEEPSILON)
601 return 1;
602 else
603 return 0;
607 Function: matrix_run
609 Computes affinity between a matrix and a string on all viable positions.
611 Parameters:
613 m - matrix_ll matrix that will be used.
614 s - fasta versus which matrix will be compared.
615 how_many - int pointer where the number of viable positions will be put.
616 info - run to issue error flags (memory error or fasta too short).
618 Returns:
620 double array which stores total affinity of the matrix for each viable
621 position on the given string (starts at 0, +1 and evaluates it until there
622 are enough chars left in the given string). NULL if a memory error occurs or
623 the fasta is too short to find the given matrix.
625 double *matrix_run(matrix_ll m, fasta s, int *how_many, run info)
627 #ifdef DEBUG
628 printf("matrix_run with %s %d\n", m->name, m->length);
629 #endif /* DEBUG */
630 *how_many = s->length - m->length + 1;
631 if (*how_many <= 0) {
632 info->error = SHORT_FASTA_WARNING;
633 return NULL;
635 double *res = (double *)calloc(*how_many, sizeof(double));
636 if (!res) {
637 info->error = MEMORY_ERROR;
638 return NULL;
640 int offset = 0;
641 while (m->length <= s->length - offset) {
642 int dummy = 0;
643 res[offset] = info->get_affinities_pointer(m, s->seq, offset, &dummy, info);
644 #ifdef DEBUG
645 printf("Seen affinity %g at offset %d\n", res[offset], offset);
646 #endif /* DEBUG */
647 offset++;
649 #ifdef DEBUG
650 printf("matrix_run %d %d\n", offset, *how_many);
651 #endif /* DEBUG */
652 assert(offset == *how_many);
653 return res;
656 //XXX
658 Function: matrix_little_window_print_tot
660 Computes affinity between a matrix and a string on all viable positions and
661 returns the sum of all affinities.
663 Parameters:
665 m - matrix_ll matrix that will be used.
666 seq - sequence versus which matrix will be compared.
667 s_length - the
670 Returns:
672 double with total affinity between the given matrix and fasta,
673 normalized upon the number of matrix matches != 0 (i.e. without an N).
675 double matrix_little_window_tot(matrix_ll m, fasta f, int begin, int end, int *tot_match_noN, run info)
677 int offset = 0;
678 char *seq = f->seq;
679 int l = f->length;
680 if(begin > 0){
681 if(begin > l){
682 fprintf(stderr, "ERROR: invalid begin in matrix_little_window_tot()\n");
683 exit(1);
685 offset = begin;
687 if(end > 0){
688 if(end > l){
689 fprintf(stderr, "ERROR: invalid end in matrix_little_window_tot()\n");
690 exit(1);
692 }else{
693 end = l;
696 double tot = 0;
697 (*tot_match_noN) = 0;
698 while (offset <= end - m->length) {
699 int foundN = 0;
700 double match = get_affinities_nonLog(m, seq, offset, &foundN, info);
701 tot += match;
702 //if (!match) { //now we directly count the Ns
703 if (!foundN) {
704 (*tot_match_noN)++;
706 offset++;
708 return tot;
712 Function: print_window_affinities
714 Prints (on stdout) total affinities for all possible windows, getting infos from a
715 double array which has affinities for a matrix computed for all positions.
717 Parameters:
719 tot_aff - double * with the computed affinities for all positions.
720 window - int with the wanted window size.
721 tot - length of tot_aff array.
722 slide - number of bases to be skipped between two outputs.
724 void print_window_affinities(double *tot_aff, int window, int tot, int slide)
726 /*int k = 0;
727 for (; k < tot; k++)
728 printf("%d: %g\n", k, tot_aff[k]);
729 printf("Using window %d tot %d\n", window, tot); */
730 double tot_w = 0;
731 int tot_match_noN = 0;
732 int i = 0;
733 int w = 0;
734 int cw = 0;
735 while (i < tot) {
736 if (w < window) {
737 //printf("For window %d at %d adding %g\n", cw, i, tot_aff[i]);
738 if (tot_aff[i]) {
739 tot_w += tot_aff[i];
740 tot_match_noN++;
742 w++;
743 i++;
744 } else {
745 printf("%d\t%g\n", cw++, tot_w / tot_match_noN);
746 if (slide < window) {
747 int j;
748 for (j = (i - window); j < (i + slide - window);
749 j++) {
750 //printf("For window %d at %d removing %g\n", cw, j, tot_aff[j]);
751 if (tot_aff[j]) {
752 tot_w -= tot_aff[j];
753 tot_match_noN--;
756 w = window - slide;
757 } else {
758 tot_w = 0;
759 tot_match_noN = 0;
760 w = 0;
761 i += slide - window;
762 //printf("Starting for a long jump after w.%d, to %d\n", cw, i);
766 if (w == window)
767 printf("%d\t%g\n", cw++, tot_w / tot_match_noN);
772 Function: matrix_cutoff_print
774 Computes affinity between a matrix and a string on all viable positions,
775 compares with the cutoff and prints the result if they are greater.
777 Parameters:
779 m - matrix_ll matrix that will be used.
780 s - fasta versus which matrix will be compared.
781 info - run with cutoff record that says if we have
782 to print the matched revcomp (1) or straight seq (0).
784 void matrix_cutoff_print(matrix_ll m, fasta s, run info)
786 #ifdef DEBUG
787 printf("matrix_cutoff_print with %s %d\n", m->name, m->length);
788 #endif /* DEBUG */
789 int offset = 0;
790 int max_strand = 1; /* 1 is +, 0 is - */
791 int max_pos = 0;
792 int seen_any_match = 0;
793 char *plus = "+";
794 char *minus = "-";
795 char *match = (char *)calloc((m->length) + 1, sizeof(char));
796 char *max_match = (char *)calloc((m->length) + 1, sizeof(char));
797 if (!match || !max_match) {
798 info->error = MEMORY_ERROR;
799 return;
801 double affinities[2];
802 double max_aff = 0;
803 while (m->length <= s->length - offset) {
804 seen_any_match = 1;
805 get_affinities(m, s->seq, offset, affinities);
806 if (info->only_max) {
807 if (!offset)
808 max_aff = affinities[0];
809 double max_local = affinities[0];
810 int max_strand_local = 1;
811 if (affinities[1] > affinities[0]) {
812 max_local = affinities[1];
813 max_strand_local = 0;
815 /* remembers only last higher match */
816 if (max_aff <= max_local) {
817 max_aff = max_local;
818 max_pos = offset;
819 max_strand = max_strand_local;
820 if (info->revcomp)
821 get_rc(s->seq, offset, m->length,
822 max_match);
823 else
824 get_seq(s->seq, offset, m->length,
825 max_match);
827 } else {
828 if (affinities[0] >= m->cutoff) {
829 get_seq(s->seq, offset, m->length, match);
830 printf("%s\t%s\t%g\t%d\t%s\n", m->name, match,
831 affinities[0], offset, plus);
833 #ifdef DEBUG
834 else
835 printf("BELOW CUTOFF %g %s\t%g\t%d\t%s\n",
836 m->cutoff, m->name, affinities[0],
837 offset, plus);
838 #endif /* DEBUG */
839 if (affinities[1] >= m->cutoff) {
840 if (info->revcomp)
841 get_rc(s->seq, offset, m->length,
842 match);
843 else /* very unlikely to have match already there for a plus match for the same matrix? XXX */
844 get_seq(s->seq, offset, m->length,
845 match);
846 printf("%s\t%s\t%g\t%d\t%s\n", m->name, match,
847 affinities[1], offset, minus);
849 #ifdef DEBUG
850 else
851 printf("BELOW CUTOFF %g %s\t%g\t%d\t%s\n",
852 m->cutoff, m->name, affinities[1],
853 offset, minus);
854 #endif /* DEBUG */
856 offset++;
858 if (info->only_max && seen_any_match) {
859 if (max_strand)
860 printf("%s\t%s\t%g\t%d\t%s\n", m->name, max_match,
861 max_aff, max_pos, plus);
862 else
863 printf("%s\t%s\t%g\t%d\t%s\n", m->name, max_match,
864 max_aff, max_pos, minus);
866 free(match);
867 free(max_match);
868 #ifdef DEBUG
869 printf("matrix_run %d\n", offset);
870 #endif /* DEBUG */
873 //XXX
874 void find_single_strand_matches(matrix_ll m, fasta s, run info)
876 int offset = 0;
877 char *match = (char *)calloc((m->length) + 1, sizeof(char));
878 if (!match) {
879 info->error = MEMORY_ERROR;
880 return;
882 while (m->length <= s->length - offset) {
883 int dummy = 0;
884 int found = (int) info->get_affinities_pointer(m, s->seq, offset, &dummy, info);
885 if (found) {
886 get_seq(s->seq, offset, m->length, match); // XXX remove only for testing purposes, inefficient
887 printf("%s\t%s\t%d\n", m->name, match, offset);
889 offset++;
891 free(match);