protect mysql tests using mysql_enabled()
[pygr.git] / pygr / intervaldb.c
blob82bf008b4a58e4577dda4dc34cb6ae4694c647dc
2 #include "intervaldb.h"
4 int C_int_max=INT_MAX; /* KLUDGE TO LET PYREX CODE ACCESS VALUE OF INT_MAX MACRO */
6 IntervalMap *read_intervals(int n,FILE *ifile)
8 int i=0;
9 IntervalMap *im=NULL;
10 CALLOC(im,n,IntervalMap); /* ALLOCATE THE WHOLE ARRAY */
11 while (i<n && fscanf(ifile," %d %d %d %d %d",&im[i].start,&im[i].end,
12 &im[i].target_id,&im[i].target_start,
13 &im[i].target_end)==5) {
14 im[i].sublist= -1; /* DEFAULT: NO SUBLIST */
15 i++;
17 if (i!=n) {
18 fprintf(stderr,"WARNING: number of records read %d does not match allocation %d\n",i,n);
20 return im;
21 handle_malloc_failure:
22 return NULL; /* NO CLEANUP ACTIONS REQUIRED */
30 int imstart_qsort_cmp(const void *void_a,const void *void_b)
31 { /* STRAIGHTFORWARD COMPARISON OF SIGNED start VALUES, LONGER INTERVALS 1ST */
32 IntervalMap *a=(IntervalMap *)void_a,*b=(IntervalMap *)void_b;
33 if (a->start<b->start)
34 return -1;
35 else if (a->start>b->start)
36 return 1;
37 else if (a->end>b->end) /* SAME START: PUT LONGER INTERVAL 1ST */
38 return -1;
39 else if (a->end<b->end) /* CONTAINED INTERVAL SHOULD FOLLOW LARGER INTERVAL*/
40 return 1;
41 else
42 return 0;
48 #ifdef MERGE_INTERVAL_ORIENTATIONS
49 int im_qsort_cmp(const void *void_a,const void *void_b)
50 { /* MERGE FORWARD AND REVERSE INTERVALS AS IF THEY WERE ALL IN FORWARD ORI */
51 int a_start,a_end,b_start,b_end;
52 IntervalMap *a=(IntervalMap *)void_a,*b=(IntervalMap *)void_b;
53 SET_INTERVAL_POSITIVE(*a,a_start,a_end);
54 SET_INTERVAL_POSITIVE(*b,b_start,b_end);
55 if (a_start<b_start)
56 return -1;
57 else if (a_start>b_start)
58 return 1;
59 else if (a_end>b_end) /* SAME START: PUT LONGER INTERVAL 1ST */
60 return -1;
61 else if (a_end<b_end) /* CONTAINED INTERVAL SHOULD FOLLOW LARGER INTERVAL*/
62 return 1;
63 else
64 return 0;
66 #endif
68 int sublist_qsort_cmp(const void *void_a,const void *void_b)
69 { /* SORT IN SUBLIST ORDER, SECONDARILY BY start */
70 IntervalMap *a=(IntervalMap *)void_a,*b=(IntervalMap *)void_b;
71 if (a->sublist<b->sublist)
72 return -1;
73 else if (a->sublist>b->sublist)
74 return 1;
75 else if (START_POSITIVE(*a) < START_POSITIVE(*b))
76 return -1;
77 else if (START_POSITIVE(*a) > START_POSITIVE(*b))
78 return 1;
79 else
80 return 0;
85 int target_qsort_cmp(const void *void_a,const void *void_b)
86 { /* SORT IN target_id ORDER, SECONDARILY BY target_start */
87 IntervalMap *a=(IntervalMap *)void_a,*b=(IntervalMap *)void_b;
88 if (a->target_id<b->target_id)
89 return -1;
90 else if (a->target_id>b->target_id)
91 return 1;
92 else if (a->target_start < b->target_start)
93 return -1;
94 else if (a->target_start > b->target_start)
95 return 1;
96 else
97 return 0;
101 SublistHeader *build_nested_list_inplace(IntervalMap im[],int n,
102 int *p_n,int *p_nlists)
104 int i=0,parent,nlists=1,isublist=0,total=0,temp=0;
105 SublistHeader *subheader=NULL;
107 #ifdef ALL_POSITIVE_ORIENTATION
108 reorient_intervals(n,im,1); /* FORCE ALL INTERVALS INTO POSITIVE ORI */
109 #endif
110 #ifdef MERGE_INTERVAL_ORIENTATIONS
111 qsort(im,n,sizeof(IntervalMap),im_qsort_cmp); /* SORT BY start, CONTAINMENT */
112 #else
113 qsort(im,n,sizeof(IntervalMap),imstart_qsort_cmp); /* SORT BY start, CONTAINMENT */
114 #endif
115 nlists=1;
116 for(i=1;i<n;++i){
117 if(!(END_POSITIVE(im[i])>END_POSITIVE(im[i-1]) /* i NOT CONTAINED */
118 || (END_POSITIVE(im[i])==END_POSITIVE(im[i-1]) /* SAME INTERVAL! */
119 && START_POSITIVE(im[i])==START_POSITIVE(im[i-1])))){
120 nlists++;
121 /* printf("%d (%d,%d) -> (%d,%d) %d\n", nlists, im[i-1].start, */
122 /* im[i-1].end, im[i].start,im[i].end,i); */
126 /* printf("%d lists?!\n", nlists); */
127 *p_nlists=nlists-1;
129 if(nlists==1){
130 *p_n=n;
131 CALLOC(subheader,1,SublistHeader); /* RETURN A DUMMY ARRAY, SINCE NULL RETURN IS ERROR CODE */
132 return subheader;
135 CALLOC(subheader,nlists+1,SublistHeader); /* SUBLIST HEADER INDEX */
137 im[0].sublist=0;
138 subheader[0].start= -1;
139 subheader[0].len=1;
140 parent=0;
141 nlists=1;
142 isublist=1;
143 for(i=1;i<n;){
144 if(isublist && (END_POSITIVE(im[i])>END_POSITIVE(im[parent]) /* i NOT CONTAINED */
145 || (END_POSITIVE(im[i])==END_POSITIVE(im[parent]) /* SAME INTERVAL! */
146 && START_POSITIVE(im[i])==START_POSITIVE(im[parent])))){
147 subheader[isublist].start=subheader[im[parent].sublist].len-1; /* RECORD PARENT RELATIVE POSITION */
148 isublist=im[parent].sublist;
149 parent=subheader[im[parent].sublist].start;
151 else{
152 if(subheader[isublist].len==0){
153 nlists++;
155 subheader[isublist].len++;
156 im[i].sublist=isublist;
157 parent=i;
158 isublist=nlists;
159 subheader[isublist].start=parent;
160 i++;
164 while(isublist>0){ /* pop remaining stack */
165 subheader[isublist].start=subheader[im[parent].sublist].len-1; /* RECORD PARENT RELATIVE POSITION */
166 isublist=im[parent].sublist;
167 parent=subheader[im[parent].sublist].start;
170 *p_n=subheader[0].len;
172 total=0;
173 for(i=0;i<nlists+1;++i){
174 temp=subheader[i].len;
175 subheader[i].len=total;
176 total+=temp;
179 /* SUBHEADER.LEN IS NOW START OF THE SUBLIST */
181 for(i=1;i<n;i+=1){
182 if(im[i].sublist>im[i-1].sublist){
183 subheader[im[i].sublist].start+=subheader[im[i-1].sublist].len;
187 /* SUBHEADER.START IS NOW ABS POSITION OF PARENT */
189 qsort(im,n,sizeof(IntervalMap),sublist_qsort_cmp);
190 /* AT THIS POINT SUBLISTS ARE GROUPED TOGETHER, READY TO PACK */
192 isublist=0;
193 subheader[0].start=0;
194 subheader[0].len=0;
195 for(i=0;i<n;++i){
196 if(im[i].sublist>isublist){
197 /* printf("Entering sublist %d (%d,%d)\n", im[i].sublist, im[i].start,im[i].end); */
198 isublist=im[i].sublist;
199 parent=subheader[isublist].start;
200 /* printf("Parent (%d,%d) is at %d, list start is at %d\n", */
201 /* im[parent].start, im[parent].end, subheader[isublist].start,i); */
202 im[parent].sublist=isublist-1;
203 subheader[isublist].len=0;
204 subheader[isublist].start=i;
206 subheader[isublist].len++;
207 im[i].sublist= -1;
210 nlists--;
211 memmove(subheader,subheader+1,nlists*sizeof(SublistHeader));
213 return subheader;
214 handle_malloc_failure:
215 /* FREE ANY MALLOCS WE PERFORMED*/
216 FREE(subheader);
217 return NULL;
222 SublistHeader *build_nested_list(IntervalMap im[],int n,
223 int *p_n,int *p_nlists)
225 int i=0,j,k,parent,nsub=0,nlists=0;
226 IntervalMap *imsub=NULL;
227 SublistHeader *subheader=NULL;
229 #ifdef ALL_POSITIVE_ORIENTATION
230 reorient_intervals(n,im,1); /* FORCE ALL INTERVALS INTO POSITIVE ORI */
231 #endif
232 #ifdef MERGE_INTERVAL_ORIENTATIONS
233 qsort(im,n,sizeof(IntervalMap),im_qsort_cmp); /* SORT BY start, CONTAINMENT */
234 #else
235 qsort(im,n,sizeof(IntervalMap),imstart_qsort_cmp); /* SORT BY start, CONTAINMENT */
236 #endif
237 while (i<n) { /* TOP LEVEL LIST SCAN */
238 parent=i;
239 i=parent+1;
240 while (i<n && parent>=0) { /* RECURSIVE ALGORITHM OF ALEX ALEKSEYENKO */
241 if (END_POSITIVE(im[i])>END_POSITIVE(im[parent]) /* i NOT CONTAINED */
242 || (END_POSITIVE(im[i])==END_POSITIVE(im[parent]) /* SAME INTERVAL! */
243 && START_POSITIVE(im[i])==START_POSITIVE(im[parent])))
244 parent=im[parent].sublist; /* POP RECURSIVE STACK*/
245 else { /* i CONTAINED IN parent*/
246 im[i].sublist=parent; /* MARK AS CONTAINED IN parent */
247 nsub++; /* COUNT TOTAL #SUBLIST ENTRIES */
248 parent=i; /* AND PUSH ONTO RECURSIVE STACK */
249 i++; /* ADVANCE TO NEXT INTERVAL */
252 } /* AT THIS POINT sublist IS EITHER -1 IF NOT IN SUBLIST, OR INDICATES parent*/
254 if (nsub>0) { /* WE HAVE SUBLISTS TO PROCESS */
255 CALLOC(imsub,nsub,IntervalMap); /* TEMPORARY ARRAY FOR REPACKING SUBLISTS */
256 for (i=j=0;i<n;i++) { /* GENERATE LIST FOR SORTING; ASSIGN HEADER INDEXES*/
257 parent=im[i].sublist;
258 if (parent>=0) {/* IN A SUBLIST */
259 imsub[j].start=i;
260 imsub[j].sublist=parent;
261 j++;
262 if (im[parent].sublist<0) /* A NEW PARENT! SET HIS SUBLIST HEADER INDEX */
263 im[parent].sublist=nlists++;
265 im[i].sublist= -1; /* RESET TO DEFAULT VALUE: NO SUBLIST */
267 qsort(imsub,nsub,sizeof(IntervalMap),sublist_qsort_cmp);
268 /* AT THIS POINT SUBLISTS ARE GROUPED TOGETHER, READY TO PACK */
270 CALLOC(subheader,nlists,SublistHeader); /* SUBLIST HEADER INDEX */
271 for (i=0;i<nsub;i++) { /* COPY SUBLIST ENTRIES TO imsub */
272 j=imsub[i].start;
273 parent=imsub[i].sublist;
274 memcpy(imsub+i,im+j,sizeof(IntervalMap)); /* COPY INTERVAL */
275 k=im[parent].sublist;
276 if (subheader[k].len==0) /* START A NEW SUBLIST */
277 subheader[k].start=i;
278 subheader[k].len++; /* COUNT THE SUBLIST ENTRIES */
279 im[j].start=im[j].end= -1; /* MARK FOR DELETION */
280 } /* DONE COPYING ALL SUBLISTS TO imsub */
282 for (i=j=0;i<n;i++) /* COMPRESS THE LIST TO REMOVE SUBLISTS */
283 if (im[i].start!= -1 || im[i].end!= -1) { /* NOT IN A SUBLIST, SO KEEP */
284 if (j<i) /* COPY TO NEW COMPACTED LOCATION */
285 memcpy(im+j,im+i,sizeof(IntervalMap));
286 j++;
289 memcpy(im+j,imsub,nsub*sizeof(IntervalMap)); /* COPY THE SUBLISTS */
290 for (i=0;i<nlists;i++) /* ADJUST start ADDRESSES FOR SHIFT*/
291 subheader[i].start += j;
292 FREE(imsub);
293 *p_n = j; /* COPY THE COMPRESSED LIST SIZES BACK TO CALLER*/
295 else { /* NO SUBLISTS: HANDLE THIS CASE CAREFULLY */
296 *p_n = n;
297 CALLOC(subheader,1,SublistHeader); /* RETURN A DUMMY ARRAY, SINCE NULL RETURN IS ERROR CODE */
299 *p_nlists=nlists; /* RETURN COUNT OF NUMBER OF SUBLISTS */
300 return subheader;
301 handle_malloc_failure:
302 FREE(imsub); /* FREE ANY MALLOCS WE PERFORMED*/
303 FREE(subheader);
304 return NULL;
308 IntervalMap *interval_map_alloc(int n)
310 IntervalMap *im=NULL;
311 CALLOC(im,n,IntervalMap);
312 return im;
313 handle_malloc_failure:
314 return NULL;
319 int find_overlap_start(int start,int end,IntervalMap im[],int n)
321 int l=0,mid,r;
323 r=n-1;
324 while (l<r) {
325 mid=(l+r)/2;
326 if (END_POSITIVE(im[mid])<=start)
327 l=mid+1;
328 else
329 r=mid;
331 if (l<n && HAS_OVERLAP_POSITIVE(im[l],start,end))
332 return l; /* l IS START OF OVERLAP */
333 else
334 return -1; /* NO OVERLAP FOUND */
340 int find_index_start(int start,int end,IntervalIndex im[],int n)
342 int l=0,mid,r;
344 r=n-1;
345 while (l<r) {
346 mid=(l+r)/2;
347 if (END_POSITIVE(im[mid])<=start)
348 l=mid+1;
349 else
350 r=mid;
352 return l; /* l IS START OF POSSIBLE OVERLAP */
357 int find_suboverlap_start(int start,int end,int isub,IntervalMap im[],
358 SublistHeader subheader[],int nlists)
360 int i;
362 if (isub>=0) {
363 i=find_overlap_start(start,end,im+subheader[isub].start,subheader[isub].len);
364 if (i>=0)
365 return i+subheader[isub].start;
367 return -1;
371 IntervalIterator *interval_iterator_alloc(void)
373 IntervalIterator *it=NULL;
374 CALLOC(it,1,IntervalIterator);
375 return it;
376 handle_malloc_failure:
377 return NULL;
380 int free_interval_iterator(IntervalIterator *it)
382 IntervalIterator *it2,*it_next;
383 if (!it)
384 return 0;
385 FREE_ITERATOR_STACK(it,it2,it_next);
386 return 0;
390 IntervalIterator *reset_interval_iterator(IntervalIterator *it)
392 ITERATOR_STACK_TOP(it);
393 it->n=0;
394 return it;
398 void reorient_intervals(int n,IntervalMap im[],int ori_sign)
400 int i,tmp;
401 for (i=0;i<n;i++) {
402 if ((im[i].start>=0 ? 1:-1)!=ori_sign) { /* ORIENTATION MISMATCH */
403 tmp=im[i].start; /* SO REVERSE THIS INTERVAL MAPPING */
404 im[i].start= -im[i].end;
405 im[i].end = -tmp;
406 tmp=im[i].target_start;
407 im[i].target_start= -im[i].target_end;
408 im[i].target_end = -tmp;
413 int find_intervals(IntervalIterator *it0,int start,int end,
414 IntervalMap im[],int n,
415 SublistHeader subheader[],int nlists,
416 IntervalMap buf[],int nbuf,
417 int *p_nreturn,IntervalIterator **it_return)
419 IntervalIterator *it=NULL,*it2=NULL;
420 int ibuf=0,j,k,ori_sign=1;
421 if (!it0) { /* ALLOCATE AN ITERATOR IF NOT SUPPLIED*/
422 CALLOC(it,1,IntervalIterator);
424 else
425 it=it0;
427 #if defined(ALL_POSITIVE_ORIENTATION) || defined(MERGE_INTERVAL_ORIENTATIONS)
428 if (start<0) { /* NEED TO CONVERT TO POSITIVE ORIENTATION */
429 j=start;
430 start= -end;
431 end= -j;
432 ori_sign = -1;
434 #endif
435 if (it->n == 0) { /* DEFAULT: SEARCH THE TOP NESTED LIST */
436 it->n=n;
437 it->i=find_overlap_start(start,end,im,n);
439 do {
440 while (it->i>=0 && it->i<it->n && HAS_OVERLAP_POSITIVE(im[it->i],start,end)) {
441 memcpy(buf+ibuf,im + it->i,sizeof(IntervalMap)); /*SAVE THIS HIT TO BUFFER */
442 ibuf++;
443 k=im[it->i].sublist; /* GET SUBLIST OF i IF ANY */
444 it->i++; /* ADVANCE TO NEXT INTERVAL */
445 if (k>=0 && (j=find_suboverlap_start(start,end,k,im,subheader,nlists))>=0) {
446 PUSH_ITERATOR_STACK(it,it2,IntervalIterator); /* RECURSE TO SUBLIST */
447 it2->i = j; /* START OF OVERLAPPING HITS IN THIS SUBLIST */
448 it2->n = subheader[k].start+subheader[k].len; /* END OF SUBLIST */
449 it=it2; /* PUSH THE ITERATOR STACK */
451 if (ibuf>=nbuf) /* FILLED THE BUFFER, RETURN THE RESULTS SO FAR */
452 goto finally_return_result;
454 } while (POP_ITERATOR_STACK(it)); /* IF STACK EXHAUSTED, EXIT */
455 if (!it0) /* FREE THE ITERATOR WE CREATED. NO NEED TO RETURN IT TO USER */
456 free_interval_iterator(it);
457 it=NULL; /* ITERATOR IS EXHAUSTED */
459 finally_return_result:
460 #if defined(ALL_POSITIVE_ORIENTATION) || defined(MERGE_INTERVAL_ORIENTATIONS)
461 reorient_intervals(ibuf,buf,ori_sign); /* REORIENT INTERVALS TO MATCH QUERY ORI */
462 #endif
463 *p_nreturn=ibuf; /* #INTERVALS FOUND IN THIS PASS */
464 *it_return=it; /* HAND BACK ITERATOR FOR CONTINUING THE SEARCH, IF ANY */
465 return 0; /* SIGNAL THAT NO ERROR OCCURRED */
466 handle_malloc_failure:
467 return -1;
474 /****************************************************************
476 * FILE-BASED SEARCH FUNCTIONS
480 /* READ A BLOCK FROM THE DATABASE FILE */
481 int read_imdiv(FILE *ifile,IntervalMap imdiv[],int div,int i_div,int ntop)
483 int block;
484 PYGR_OFF_T ipos;
485 ipos=div*i_div; /* CALCULATE POSITION IN RECORDS */
486 if (ipos+div<=ntop) /* GET A WHOLE BLOCK */
487 block=div;
488 else /* JUST READ PARTIAL BLOCK AT END */
489 block=ntop%div;
490 ipos *= sizeof(IntervalMap); /* CALCULATE FILE POSITION IN BYTES */
491 PYGR_FSEEK(ifile,ipos,SEEK_SET);
492 fread(imdiv,sizeof(IntervalMap),block,ifile);
493 return block;
497 /* READ A SUBLIST FROM DATABASE FILE */
498 IntervalMap *read_sublist(FILE *ifile,SublistHeader *subheader,
499 IntervalMap *im)
501 PYGR_OFF_T ipos;
502 if (im==NULL) {
503 CALLOC(im,subheader->len,IntervalMap);
505 ipos=subheader->start; /* CALCULATE POSITION IN RECORDS */
506 ipos*=sizeof(IntervalMap); /* CALCULATE FILE POSITION IN BYTES */
507 PYGR_FSEEK(ifile,ipos,SEEK_SET);
508 fread(im,sizeof(IntervalMap),subheader->len,ifile);
509 return im;
510 handle_malloc_failure:
511 return NULL;
515 /* READ A BLOCK OF THE SUBLIST HEADER FILE */
516 int read_subheader_block(SublistHeader subheader[],int isub,int nblock,
517 int nsubheader,FILE *ifile)
519 PYGR_OFF_T ipos;
520 long start;
521 start=isub-(isub%nblock); /* GET BLOCK START */
522 if (start+nblock>nsubheader)
523 nblock=nsubheader-start; /* TRUNCATE TO FIT MAX FILE LENGTH */
524 ipos=start; /* CONVERT TO off_t TYPE */
525 ipos *= sizeof(SublistHeader); /* CALCULATE ACTUAL BYTE OFFSET */
526 PYGR_FSEEK(ifile,ipos,SEEK_SET);
527 fread(subheader,sizeof(SublistHeader),nblock,ifile);
528 return start;
534 int find_file_start(IntervalIterator *it,int start,int end,int isub,
535 IntervalIndex ii[],int nii,
536 SublistHeader *subheader,int nlists,
537 SubheaderFile *subheader_file,
538 int ntop,int div,FILE *ifile)
540 int i_div= -1,offset=0,offset_div=0;
541 if (isub<0) /* TOP-LEVEL SEARCH: USE THE INDEX */
542 i_div=find_index_start(start,end,ii,nii);
543 else { /* GET PTR TO subheader[isub] */
544 #ifdef ON_DEMAND_SUBLIST_HEADER
545 if (isub<subheader_file->start /* isub OUTSIDE OUR CURRENT BLOCK */
546 || isub>=subheader_file->start+subheader_file->nblock)
547 subheader_file->start= /* LOAD NEW BLOCK FROM DISK */
548 read_subheader_block(subheader_file->subheader,isub,
549 subheader_file->nblock,nlists,
550 subheader_file->ifile);
551 subheader=subheader_file->subheader + (isub-subheader_file->start);
552 #else
553 subheader += isub; /* POINT TO OUR SUBHEADER */
554 #endif
555 if (subheader->len>div) { /* BIG SUBLIST, SO USE THE INDEX */
556 offset=subheader->start;
557 offset_div=offset/div;/* offset GUARANTEED TO BE MULTIPLE OF div */
558 ntop=subheader->len;
559 nii=ntop/div; /* CALCULATE SUBLIST INDEX SIZE */
560 if (ntop%div) /* ONE EXTRA ENTRY FOR PARTIAL BLOCK */
561 nii++;
562 i_div=find_index_start(start,end,ii+offset_div,nii);
566 if (!it->im) { /* NO ALLOCATION? ALLOCATE OUR BLOCK SIZE div */
567 CALLOC(it->im,div,IntervalMap); /* ALWAYS ALLOCATE div BUFFERSIZE */
569 if (i_div>=0) { /* READ A SPECIFIC BLOCK OF SIZE div */
570 it->n=read_imdiv(ifile,it->im,div,i_div+offset_div,ntop+offset);
571 it->ntop=ntop+offset; /* END OF THIS LIST IN THE BINARY FILE */
572 it->nii=nii+offset_div; /* SAVE INFORMATION FOR READING SUBSEQUENT BLOCKS */
573 it->i_div=i_div+offset_div; /* INDEX OF THIS BLOCK IN THE BINARY FILE */
575 else { /* A SMALL SUBLIST: READ THE WHOLE LIST INTO MEMORY */
576 read_sublist(ifile,subheader,it->im); /* GUARANTEED TO BE <=div ITEMS */
577 it->n=subheader->len;
578 it->nii=1;
579 it->i_div=0; /* INDICATE THAT THERE ARE NO ADDITIONAL BLOCKS TO READ*/
582 it->i=find_overlap_start(start,end,it->im,it->n);
583 return it->i;
584 handle_malloc_failure:
585 return -2; /* SIGNAL THAT MEMORY ERROR OCCURRED */
589 int find_file_intervals(IntervalIterator *it0,int start,int end,
590 IntervalIndex ii[],int nii,
591 SublistHeader subheader[],int nlists,
592 SubheaderFile *subheader_file,
593 int ntop,int div,FILE *ifile,
594 IntervalMap buf[],int nbuf,
595 int *p_nreturn,IntervalIterator **it_return)
597 IntervalIterator *it=NULL,*it2=NULL;
598 int k,ibuf=0,ori_sign=1,ov=0;
599 if (!it0) { /* ALLOCATE AN ITERATOR IF NOT SUPPLIED*/
600 CALLOC(it,1,IntervalIterator);
602 else
603 it=it0;
605 #if defined(ALL_POSITIVE_ORIENTATION) || defined(MERGE_INTERVAL_ORIENTATIONS)
606 if (start<0) { /* NEED TO CONVERT TO POSITIVE ORIENTATION */
607 k=start;
608 start= -end;
609 end= -k;
610 ori_sign = -1;
612 #endif
614 if (it->n == 0) /* DEFAULT: SEARCH THE TOP NESTED LIST */
615 if (find_file_start(it,start,end,-1,ii,nii,subheader,nlists,
616 subheader_file,ntop,div,ifile) == FIND_FILE_MALLOC_ERR)
617 goto handle_malloc_failure;
619 do { /* ITERATOR STACK LOOP */
620 while (it->i_div < it->nii) { /* BLOCK ITERATION LOOP */
621 while (it->i>=0 && it->i<it->n /* INDIVIDUAL INTERVAL ITERATION LOOP */
622 && HAS_OVERLAP_POSITIVE(it->im[it->i],start,end)) { /*OVERLAPS!*/
623 memcpy(buf+ibuf,it->im + it->i,sizeof(IntervalMap)); /*SAVE THIS HIT */
624 ibuf++;
625 k=it->im[it->i].sublist; /* GET SUBLIST OF i IF ANY */
626 it->i++; /* ADVANCE TO NEXT INTERVAL */
627 PUSH_ITERATOR_STACK(it,it2,IntervalIterator); /* RECURSE TO SUBLIST */
628 if (k>=0 && (ov=find_file_start(it2,start,end,k,ii,nii,subheader,nlists,
629 subheader_file,ntop,div,ifile))>=0)
630 it=it2; /* PUSH THE ITERATOR STACK */
631 if (FIND_FILE_MALLOC_ERR == ov)
632 goto handle_malloc_failure;
634 if (ibuf>=nbuf) /* FILLED THE BUFFER, RETURN THE RESULTS SO FAR */
635 goto finally_return_result;
637 it->i_div++; /* TRY GOING TO NEXT BLOCK */
638 if (it->i == it->n /* USED WHOLE BLOCK, SO THERE MIGHT BE MORE */
639 && it->i_div < it->nii) { /* CONTINUE TO NEXT BLOCK */
640 it->n=read_imdiv(ifile,it->im,div,it->i_div,it->ntop); /*READ NEXT BLOCK*/
641 it->i=0; /* PROCESS IT FROM ITS START */
644 } while (POP_ITERATOR_STACK(it)); /* IF STACK EXHAUSTED, EXIT */
645 if (!it0) /* FREE THE ITERATOR WE CREATED. NO NEED TO RETURN IT TO USER */
646 free_interval_iterator(it);
647 it=NULL; /* ITERATOR IS EXHAUSTED */
649 finally_return_result:
650 #if defined(ALL_POSITIVE_ORIENTATION) || defined(MERGE_INTERVAL_ORIENTATIONS)
651 reorient_intervals(ibuf,buf,ori_sign); /* REORIENT INTERVALS TO MATCH QUERY ORI */
652 #endif
653 *p_nreturn=ibuf; /* #INTERVALS FOUND IN THIS PASS */
654 *it_return=it; /* HAND BACK ITERATOR FOR CONTINUING THE SEARCH, IF ANY */
655 return 0; /* SIGNAL THAT NO ERROR OCCURRED */
656 handle_malloc_failure:
657 return -1;
665 /* FUNCTIONS FOR READING AND WRITING OF THE BINARY DATABASE FILES */
667 int write_padded_binary(IntervalMap im[],int n,int div,FILE *ifile)
669 int i,npad;
670 fwrite(im,sizeof(IntervalMap),n,ifile); /* SAVE THE ACTUAL DATA */
671 npad=n%div;
672 if (npad) {
673 npad=div-npad; /* #ITEMS NEEDED TO PAD TO AN EXACT MULTIPLE OF div */
674 for (i=0;i<npad;i++) /* GUARANTEED im HAS AT LEAST ONE RECORD */
675 fwrite(im,sizeof(IntervalMap),1,ifile); /*THIS IS JUST PADDING */
677 return n+npad; /* #RECORDS SAVED */
681 int repack_subheaders(IntervalMap im[],int n,int div,
682 SublistHeader *subheader,int nlists)
684 int i,j,*sub_map=NULL;
685 SublistHeader *sub_pack=NULL;
687 CALLOC(sub_map,nlists,int);
688 CALLOC(sub_pack,nlists,SublistHeader);
689 for (i=j=0;i<nlists;i++) { /* PLACE SUBLISTS W/ len>div AT FRONT */
690 if (subheader[i].len>div) {
691 memcpy(sub_pack+j,subheader+i,sizeof(SublistHeader));
692 sub_map[i]=j;
693 j++;
696 for (i=0;i<nlists;i++) { /* PLACE SUBLISTS W/ len<=div AFTERWARDS */
697 if (subheader[i].len<=div) {
698 memcpy(sub_pack+j,subheader+i,sizeof(SublistHeader));
699 sub_map[i]=j;
700 j++;
703 for (i=0;i<n;i++) /* ADJUST im[].sublist TO THE NEW LOCATIONS */
704 if (im[i].sublist>=0)
705 im[i].sublist=sub_map[im[i].sublist];
706 memcpy(subheader,sub_pack,nlists*sizeof(SublistHeader)); /* SAVE REORDERED LIST*/
708 FREE(sub_map);
709 FREE(sub_pack);
710 return 0;
711 handle_malloc_failure:
712 return -1;
716 int write_binary_index(IntervalMap im[],int n,int div,FILE *ifile)
718 int i,j,nsave=0;
719 for (i=0;i<n;i+=div) {
720 #ifdef MERGE_INTERVAL_ORIENTATIONS
721 if (im[i].start>=0) /* FORWARD ORI */
722 #endif
723 fwrite(&(im[i].start),sizeof(int),1,ifile); /*SAVE start */
724 #ifdef MERGE_INTERVAL_ORIENTATIONS
725 else { /* REVERSE ORI */
726 j= - im[i].end;
727 fwrite(&j,sizeof(int),1,ifile); /*SAVE start */
729 #endif
730 j=i+div-1;
731 if (j>=n)
732 j=n-1;
733 #ifdef MERGE_INTERVAL_ORIENTATIONS
734 if (im[j].start>=0) /* FORWARD ORI */
735 #endif
736 fwrite(&(im[j].end),sizeof(int),1,ifile); /*SAVE end */
737 #ifdef MERGE_INTERVAL_ORIENTATIONS
738 else { /* REVERSE ORI */
739 j= - im[j].start;
740 fwrite(&j,sizeof(int),1,ifile); /*SAVE end */
742 #endif
743 nsave++;
745 return nsave;
750 char *write_binary_files(IntervalMap im[],int n,int ntop,int div,
751 SublistHeader *subheader,int nlists,char filestem[])
753 int i,npad=0,nii;
754 char path[2048];
755 FILE *ifile=NULL,*ifile_subheader=NULL;
756 SublistHeader sh_tmp;
757 static char err_msg[1024];
759 if (nlists>0 /* REPACK SMALL SUBLISTS TO END */
760 && repack_subheaders(im,n,div,subheader,nlists)
761 == FIND_FILE_MALLOC_ERR) {
762 sprintf(err_msg,"unable to malloc %d subheaders",nlists);
763 return err_msg;
765 sprintf(path,"%s.subhead",filestem); /* SAVE THE SUBHEADER LIST */
766 ifile_subheader=fopen(path,"wb"); /* binary file */
767 if (!ifile_subheader) {
768 sprintf(err_msg,"unable to open file %s for writing",path);
769 return err_msg;
771 sprintf(path,"%s.idb",filestem); /* SAVE THE DATABASE */
772 ifile=fopen(path,"wb"); /* binary file */
773 if (!ifile) {
774 sprintf(err_msg,"unable to open file %s for writing",path);
775 return err_msg;
777 npad=write_padded_binary(im,ntop,div,ifile); /* WRITE THE TOP LEVEL LIST */
778 for (i=0;i<nlists;i++) {
779 sh_tmp.start=npad; /* FILE LOCATION WHERE THIS SUBLIST STORED */
780 sh_tmp.len=subheader[i].len; /* SAVE THE TRUE SUBLIST LENGTH, UNPADDED */
781 fwrite(&sh_tmp,sizeof(SublistHeader),1,ifile_subheader);
782 if (subheader[i].len>div) /* BIG LIST: PAD TO EXACT MULTIPLE OF div */
783 npad+=write_padded_binary(im+subheader[i].start,subheader[i].len,div,ifile);
784 else { /* SMALL LIST: SAVE W/O PADDING */
785 fwrite(im+subheader[i].start,sizeof(IntervalMap),subheader[i].len,ifile);
786 npad+=subheader[i].len;
789 fclose(ifile);
790 fclose(ifile_subheader);
792 sprintf(path,"%s.index",filestem); /* SAVE THE COMPACTED INDEX */
793 ifile=fopen(path,"wb"); /* binary file */
794 if (!ifile) {
795 sprintf(err_msg,"unable to open file %s for writing",path);
796 return err_msg;
798 nii=write_binary_index(im,ntop,div,ifile);
799 for (i=0;i<nlists;i++) /* ALSO STORE INDEX DATA FOR BIG SUBLISTS */
800 if (subheader[i].len>div)
801 nii+=write_binary_index(im+subheader[i].start,subheader[i].len,div,ifile);
802 fclose(ifile);
804 sprintf(path,"%s.size",filestem); /* SAVE BASIC SIZE INFO*/
805 ifile=fopen(path,"w"); /* text file */
806 if (!ifile) {
807 sprintf(err_msg,"unable to open file %s for writing",path);
808 return err_msg;
810 fprintf(ifile,"%d %d %d %d %d\n",n,ntop,div,nlists,nii);
811 fclose(ifile);
813 return NULL; /* RETURN CODE SIGNALS SUCCESS!! */
818 IntervalDBFile *read_binary_files(char filestem[],char err_msg[],
819 int subheader_nblock)
821 int n,ntop,div,nlists,nii;
822 char path[2048];
823 IntervalIndex *ii=NULL;
824 SublistHeader *subheader=NULL;
825 IntervalDBFile *idb_file=NULL;
826 FILE *ifile=NULL;
828 sprintf(path,"%s.size",filestem); /* READ BASIC SIZE INFO*/
829 ifile=fopen(path,"r"); /* text file */
830 if (!ifile) {
831 if (err_msg)
832 sprintf(err_msg,"unable to open file %s",path);
833 return NULL;
835 fscanf(ifile,"%d %d %d %d %d",&n,&ntop,&div,&nlists,&nii);
836 fclose(ifile);
838 CALLOC(ii,nii+1,IntervalIndex);
839 if (nii>0) {
840 sprintf(path,"%s.index",filestem); /* READ THE COMPACTED INDEX */
841 ifile=fopen(path,"rb"); /* binary file */
842 if (!ifile) {
843 if (err_msg)
844 sprintf(err_msg,"unable to open file %s",path);
845 return NULL;
847 fread(ii,sizeof(IntervalIndex),nii,ifile);
848 fclose(ifile);
851 CALLOC(idb_file,1,IntervalDBFile);
852 if(nlists>0){
853 sprintf(path,"%s.subhead",filestem); /* SAVE THE SUBHEADER LIST */
854 ifile=fopen(path,"rb"); /* binary file */
855 if (!ifile) {
856 if (err_msg)
857 sprintf(err_msg,"unable to open file %s",path);
858 return NULL;
860 #ifdef ON_DEMAND_SUBLIST_HEADER
861 CALLOC(subheader,subheader_nblock,SublistHeader);
862 idb_file->subheader_file.subheader=subheader;
863 idb_file->subheader_file.nblock=subheader_nblock;
864 idb_file->subheader_file.start = -subheader_nblock; /* NO BLOCK LOADED */
865 idb_file->subheader_file.ifile=ifile;
866 #else
867 CALLOC(subheader,nlists,SublistHeader); /* LOAD THE ENTIRE SUBHEADER */
868 fread(subheader,sizeof(SublistHeader),nlists,ifile); /*SAVE LIST */
869 fclose(ifile);
870 #endif
873 idb_file->n=n;
874 idb_file->ntop=ntop;
875 idb_file->nlists=nlists;
876 idb_file->div=div;
877 idb_file->nii=ntop/div;
878 if (ntop%div) /* INDEX IS PADDED TO EXACT MULTIPLE OF div */
879 idb_file->nii++; /* ONE EXTRA ENTRY FOR PARTIAL BLOCK */
880 idb_file->ii=ii;
881 idb_file->subheader=subheader;
882 sprintf(path,"%s.idb",filestem); /* OPEN THE DATABASE */
883 idb_file->ifile_idb=fopen(path,"rb"); /* binary file */
884 if (!idb_file->ifile_idb) {
885 if (err_msg)
886 sprintf(err_msg,"unable to open file %s",path);
887 free(idb_file);
888 return NULL;
890 return idb_file;
891 handle_malloc_failure:
892 FREE(ii); /* DUMP OUR MEMORY */
893 FREE(subheader);
894 FREE(idb_file);
895 return NULL;
900 int free_interval_dbfile(IntervalDBFile *db_file)
902 if (db_file->ifile_idb)
903 fclose(db_file->ifile_idb);
904 #ifdef ON_DEMAND_SUBLIST_HEADER
905 if (db_file->subheader_file.ifile)
906 fclose(db_file->subheader_file.ifile);
907 #endif
908 FREE(db_file->ii);
909 FREE(db_file->subheader);
910 free(db_file);
911 return 0;
917 int save_text_file(char filestem[],char basestem[],
918 char err_msg[],FILE *ofile)
920 int i,n,ntop,div,nlists,nii,npad;
921 char path[2048];
922 IntervalMap im;
923 IntervalIndex ii;
924 SublistHeader subheader;
925 FILE *ifile=NULL;
927 sprintf(path,"%s.size",filestem); /* READ BASIC SIZE INFO*/
928 ifile=fopen(path,"r"); /* text file */
929 if (!ifile)
930 goto unable_to_open_file;
931 if (5!=fscanf(ifile,"%d %d %d %d %d",&n,&ntop,&div,&nlists,&nii))
932 goto fread_error_occurred;
933 fclose(ifile);
934 npad=ntop%div;
935 if (npad>0) /* PAD TO AN EXACT MULTIPLE OF div */
936 npad=ntop+(div-npad);
937 else /* AN EXACT MULTIPLE OF div, SO NO PADDING */
938 npad=ntop;
940 if (fprintf(ofile,"SIZE\t%s\t%d %d %d %d %d\n",
941 basestem,n,ntop,div,nlists,nii)<0)
942 goto write_error_occurred;
944 if (nii>0) {
945 sprintf(path,"%s.index",filestem); /* READ THE COMPACTED INDEX */
946 ifile=fopen(path,"rb"); /* binary file */
947 if (!ifile)
948 goto unable_to_open_file;
949 for (i=0;i<nii;i++) {
950 if (1!=fread(&ii,sizeof(IntervalIndex),1,ifile))
951 goto fread_error_occurred;
952 if (fprintf(ofile,"I %d %d\n",ii.start,ii.end)<0)
953 goto write_error_occurred;
955 fclose(ifile);
958 if(nlists>0){
959 sprintf(path,"%s.subhead",filestem); /* READ THE SUBHEADER LIST */
960 ifile=fopen(path,"rb"); /* binary file */
961 if (!ifile)
962 goto unable_to_open_file;
963 for (i=0;i<nlists;i++) {
964 if (1!=fread(&subheader,sizeof(SublistHeader),1,ifile))
965 goto fread_error_occurred;
966 if (fprintf(ofile,"S %d %d\n",subheader.start,subheader.len)<0)
967 goto write_error_occurred;
968 npad=subheader.start+subheader.len;
970 fclose(ifile);
973 if (npad>0) {
974 sprintf(path,"%s.idb",filestem); /* READ THE DATABASE */
975 ifile=fopen(path,"rb"); /* binary file */
976 if (!ifile)
977 goto unable_to_open_file;
978 for (i=0;i<npad;i++) {
979 if (1!=fread(&im,sizeof(IntervalMap),1,ifile))
980 goto fread_error_occurred;
981 if (fprintf(ofile,"M %d %d %d %d %d %d\n",im.start,im.end,
982 im.target_id,im.target_start,
983 im.target_end,im.sublist)<0)
984 goto write_error_occurred;
986 fclose(ifile);
988 return 0; /* INDICATES NO ERROR OCCURRED */
989 unable_to_open_file:
990 if (err_msg)
991 sprintf(err_msg,"unable to open file %s",path);
992 return -1;
993 fread_error_occurred:
994 if (err_msg)
995 sprintf(err_msg,"error or EOF reading file %s",path);
996 return -1;
997 write_error_occurred:
998 if (err_msg)
999 sprintf(err_msg,"error writing output file! out of disk space?");
1000 return -1;
1005 int text_file_to_binaries(FILE *infile,char buildpath[],char err_msg[])
1007 int i,n,ntop,div,nlists,nii,npad;
1008 char path[2048],line[32768],filestem[2048];
1009 IntervalMap im;
1010 IntervalIndex ii;
1011 SublistHeader subheader;
1012 FILE *ifile=NULL;
1014 if (NULL==fgets(line,32767,infile))
1015 goto fread_error_occurred;
1016 if (6!=sscanf(line,"SIZE\t%s\t%d %d %d %d %d",
1017 filestem,&n,&ntop,&div,&nlists,&nii))
1018 goto fread_error_occurred;
1019 sprintf(path,"%s%s.size",buildpath,filestem); /* SAVE BASIC SIZE INFO*/
1020 ifile=fopen(path,"w"); /* text file */
1021 if (!ifile)
1022 goto unable_to_open_file;
1023 if (fprintf(ifile,"%d %d %d %d %d\n",n,ntop,div,nlists,nii)<0)
1024 goto write_error_occurred;
1025 fclose(ifile);
1026 npad=ntop%div;
1027 if (npad>0) /* PAD TO AN EXACT MULTIPLE OF div */
1028 npad=ntop+(div-npad);
1029 else /* AN EXACT MULTIPLE OF div, SO NO PADDING */
1030 npad=ntop;
1032 if (nii>0) {
1033 sprintf(path,"%s%s.index",buildpath,filestem); /* SAVE INDEX INFO*/
1034 ifile=fopen(path,"wb"); /* binary file */
1035 if (!ifile)
1036 goto unable_to_open_file;
1037 for (i=0;i<nii;i++) {
1038 if (NULL==fgets(line,32767,infile))
1039 goto fread_error_occurred;
1040 if (2!=sscanf(line,"I %d %d",&(ii.start),&(ii.end)))
1041 goto fread_error_occurred;
1042 if (1!=fwrite(&ii,sizeof(IntervalIndex),1,ifile))
1043 goto write_error_occurred;
1045 fclose(ifile);
1048 if(nlists>0){
1049 sprintf(path,"%s%s.subhead",buildpath,filestem); /* SAVE THE SUBHEADER LIST */
1050 ifile=fopen(path,"wb"); /* binary file */
1051 if (!ifile)
1052 goto unable_to_open_file;
1053 for (i=0;i<nlists;i++) {
1054 if (NULL==fgets(line,32767,infile))
1055 goto fread_error_occurred;
1056 if (2!=sscanf(line,"S %d %d",&(subheader.start),&(subheader.len)))
1057 goto fread_error_occurred;
1058 if (1!=fwrite(&subheader,sizeof(SublistHeader),1,ifile))
1059 goto write_error_occurred;
1060 npad=subheader.start+subheader.len;
1062 fclose(ifile);
1065 sprintf(path,"%s%s.idb",buildpath,filestem); /* SAVE THE ACTUAL INTERVAL DB*/
1066 ifile=fopen(path,"wb"); /* binary file */
1067 if (!ifile)
1068 goto unable_to_open_file;
1069 for (i=0;i<npad;i++) {
1070 if (NULL==fgets(line,32767,infile))
1071 goto fread_error_occurred;
1072 if (6!=sscanf(line,"M %d %d %d %d %d %d",&(im.start),&(im.end),
1073 &(im.target_id),&(im.target_start),
1074 &(im.target_end),&(im.sublist)))
1075 goto fread_error_occurred;
1076 if (1!=fwrite(&im,sizeof(IntervalMap),1,ifile))
1077 goto write_error_occurred;
1079 fclose(ifile);
1081 return 0; /* INDICATES NO ERROR OCCURRED */
1082 unable_to_open_file:
1083 if (err_msg)
1084 sprintf(err_msg,"unable to open file %s",path);
1085 return -1;
1086 fread_error_occurred:
1087 if (err_msg)
1088 sprintf(err_msg,"error or EOF reading input file");
1089 return -1;
1090 write_error_occurred:
1091 if (err_msg)
1092 sprintf(err_msg,"error writing file %s! out of disk space?",
1093 path);
1094 return -1;