2 #include "intervaldb.h"
4 int C_int_max
=INT_MAX
; /* KLUDGE TO LET PYREX CODE ACCESS VALUE OF INT_MAX MACRO */
6 IntervalMap
*read_intervals(int n
,FILE *ifile
)
10 CALLOC(im
,n
,IntervalMap
); /* ALLOCATE THE WHOLE ARRAY */
11 while (i
<n
&& fscanf(ifile
," %d %d %d %d %d",&im
[i
].start
,&im
[i
].end
,
12 &im
[i
].target_id
,&im
[i
].target_start
,
13 &im
[i
].target_end
)==5) {
14 im
[i
].sublist
= -1; /* DEFAULT: NO SUBLIST */
18 fprintf(stderr
,"WARNING: number of records read %d does not match allocation %d\n",i
,n
);
21 handle_malloc_failure
:
22 return NULL
; /* NO CLEANUP ACTIONS REQUIRED */
30 int imstart_qsort_cmp(const void *void_a
,const void *void_b
)
31 { /* STRAIGHTFORWARD COMPARISON OF SIGNED start VALUES, LONGER INTERVALS 1ST */
32 IntervalMap
*a
=(IntervalMap
*)void_a
,*b
=(IntervalMap
*)void_b
;
33 if (a
->start
<b
->start
)
35 else if (a
->start
>b
->start
)
37 else if (a
->end
>b
->end
) /* SAME START: PUT LONGER INTERVAL 1ST */
39 else if (a
->end
<b
->end
) /* CONTAINED INTERVAL SHOULD FOLLOW LARGER INTERVAL*/
48 #ifdef MERGE_INTERVAL_ORIENTATIONS
49 int im_qsort_cmp(const void *void_a
,const void *void_b
)
50 { /* MERGE FORWARD AND REVERSE INTERVALS AS IF THEY WERE ALL IN FORWARD ORI */
51 int a_start
,a_end
,b_start
,b_end
;
52 IntervalMap
*a
=(IntervalMap
*)void_a
,*b
=(IntervalMap
*)void_b
;
53 SET_INTERVAL_POSITIVE(*a
,a_start
,a_end
);
54 SET_INTERVAL_POSITIVE(*b
,b_start
,b_end
);
57 else if (a_start
>b_start
)
59 else if (a_end
>b_end
) /* SAME START: PUT LONGER INTERVAL 1ST */
61 else if (a_end
<b_end
) /* CONTAINED INTERVAL SHOULD FOLLOW LARGER INTERVAL*/
68 int sublist_qsort_cmp(const void *void_a
,const void *void_b
)
69 { /* SORT IN SUBLIST ORDER, SECONDARILY BY start */
70 IntervalMap
*a
=(IntervalMap
*)void_a
,*b
=(IntervalMap
*)void_b
;
71 if (a
->sublist
<b
->sublist
)
73 else if (a
->sublist
>b
->sublist
)
75 else if (START_POSITIVE(*a
) < START_POSITIVE(*b
))
77 else if (START_POSITIVE(*a
) > START_POSITIVE(*b
))
85 int target_qsort_cmp(const void *void_a
,const void *void_b
)
86 { /* SORT IN target_id ORDER, SECONDARILY BY target_start */
87 IntervalMap
*a
=(IntervalMap
*)void_a
,*b
=(IntervalMap
*)void_b
;
88 if (a
->target_id
<b
->target_id
)
90 else if (a
->target_id
>b
->target_id
)
92 else if (a
->target_start
< b
->target_start
)
94 else if (a
->target_start
> b
->target_start
)
101 SublistHeader
*build_nested_list_inplace(IntervalMap im
[],int n
,
102 int *p_n
,int *p_nlists
)
104 int i
=0,parent
,nlists
=1,isublist
=0,total
=0,temp
=0;
105 SublistHeader
*subheader
=NULL
;
107 #ifdef ALL_POSITIVE_ORIENTATION
108 reorient_intervals(n
,im
,1); /* FORCE ALL INTERVALS INTO POSITIVE ORI */
110 #ifdef MERGE_INTERVAL_ORIENTATIONS
111 qsort(im
,n
,sizeof(IntervalMap
),im_qsort_cmp
); /* SORT BY start, CONTAINMENT */
113 qsort(im
,n
,sizeof(IntervalMap
),imstart_qsort_cmp
); /* SORT BY start, CONTAINMENT */
117 if(!(END_POSITIVE(im
[i
])>END_POSITIVE(im
[i
-1]) /* i NOT CONTAINED */
118 || (END_POSITIVE(im
[i
])==END_POSITIVE(im
[i
-1]) /* SAME INTERVAL! */
119 && START_POSITIVE(im
[i
])==START_POSITIVE(im
[i
-1])))){
121 /* printf("%d (%d,%d) -> (%d,%d) %d\n", nlists, im[i-1].start, */
122 /* im[i-1].end, im[i].start,im[i].end,i); */
126 /* printf("%d lists?!\n", nlists); */
131 CALLOC(subheader
,1,SublistHeader
); /* RETURN A DUMMY ARRAY, SINCE NULL RETURN IS ERROR CODE */
135 CALLOC(subheader
,nlists
+1,SublistHeader
); /* SUBLIST HEADER INDEX */
138 subheader
[0].start
= -1;
144 if(isublist
&& (END_POSITIVE(im
[i
])>END_POSITIVE(im
[parent
]) /* i NOT CONTAINED */
145 || (END_POSITIVE(im
[i
])==END_POSITIVE(im
[parent
]) /* SAME INTERVAL! */
146 && START_POSITIVE(im
[i
])==START_POSITIVE(im
[parent
])))){
147 subheader
[isublist
].start
=subheader
[im
[parent
].sublist
].len
-1; /* RECORD PARENT RELATIVE POSITION */
148 isublist
=im
[parent
].sublist
;
149 parent
=subheader
[im
[parent
].sublist
].start
;
152 if(subheader
[isublist
].len
==0){
155 subheader
[isublist
].len
++;
156 im
[i
].sublist
=isublist
;
159 subheader
[isublist
].start
=parent
;
164 while(isublist
>0){ /* pop remaining stack */
165 subheader
[isublist
].start
=subheader
[im
[parent
].sublist
].len
-1; /* RECORD PARENT RELATIVE POSITION */
166 isublist
=im
[parent
].sublist
;
167 parent
=subheader
[im
[parent
].sublist
].start
;
170 *p_n
=subheader
[0].len
;
173 for(i
=0;i
<nlists
+1;++i
){
174 temp
=subheader
[i
].len
;
175 subheader
[i
].len
=total
;
179 /* SUBHEADER.LEN IS NOW START OF THE SUBLIST */
182 if(im
[i
].sublist
>im
[i
-1].sublist
){
183 subheader
[im
[i
].sublist
].start
+=subheader
[im
[i
-1].sublist
].len
;
187 /* SUBHEADER.START IS NOW ABS POSITION OF PARENT */
189 qsort(im
,n
,sizeof(IntervalMap
),sublist_qsort_cmp
);
190 /* AT THIS POINT SUBLISTS ARE GROUPED TOGETHER, READY TO PACK */
193 subheader
[0].start
=0;
196 if(im
[i
].sublist
>isublist
){
197 /* printf("Entering sublist %d (%d,%d)\n", im[i].sublist, im[i].start,im[i].end); */
198 isublist
=im
[i
].sublist
;
199 parent
=subheader
[isublist
].start
;
200 /* printf("Parent (%d,%d) is at %d, list start is at %d\n", */
201 /* im[parent].start, im[parent].end, subheader[isublist].start,i); */
202 im
[parent
].sublist
=isublist
-1;
203 subheader
[isublist
].len
=0;
204 subheader
[isublist
].start
=i
;
206 subheader
[isublist
].len
++;
211 memmove(subheader
,subheader
+1,nlists
*sizeof(SublistHeader
));
214 handle_malloc_failure
:
215 /* FREE ANY MALLOCS WE PERFORMED*/
222 SublistHeader
*build_nested_list(IntervalMap im
[],int n
,
223 int *p_n
,int *p_nlists
)
225 int i
=0,j
,k
,parent
,nsub
=0,nlists
=0;
226 IntervalMap
*imsub
=NULL
;
227 SublistHeader
*subheader
=NULL
;
229 #ifdef ALL_POSITIVE_ORIENTATION
230 reorient_intervals(n
,im
,1); /* FORCE ALL INTERVALS INTO POSITIVE ORI */
232 #ifdef MERGE_INTERVAL_ORIENTATIONS
233 qsort(im
,n
,sizeof(IntervalMap
),im_qsort_cmp
); /* SORT BY start, CONTAINMENT */
235 qsort(im
,n
,sizeof(IntervalMap
),imstart_qsort_cmp
); /* SORT BY start, CONTAINMENT */
237 while (i
<n
) { /* TOP LEVEL LIST SCAN */
240 while (i
<n
&& parent
>=0) { /* RECURSIVE ALGORITHM OF ALEX ALEKSEYENKO */
241 if (END_POSITIVE(im
[i
])>END_POSITIVE(im
[parent
]) /* i NOT CONTAINED */
242 || (END_POSITIVE(im
[i
])==END_POSITIVE(im
[parent
]) /* SAME INTERVAL! */
243 && START_POSITIVE(im
[i
])==START_POSITIVE(im
[parent
])))
244 parent
=im
[parent
].sublist
; /* POP RECURSIVE STACK*/
245 else { /* i CONTAINED IN parent*/
246 im
[i
].sublist
=parent
; /* MARK AS CONTAINED IN parent */
247 nsub
++; /* COUNT TOTAL #SUBLIST ENTRIES */
248 parent
=i
; /* AND PUSH ONTO RECURSIVE STACK */
249 i
++; /* ADVANCE TO NEXT INTERVAL */
252 } /* AT THIS POINT sublist IS EITHER -1 IF NOT IN SUBLIST, OR INDICATES parent*/
254 if (nsub
>0) { /* WE HAVE SUBLISTS TO PROCESS */
255 CALLOC(imsub
,nsub
,IntervalMap
); /* TEMPORARY ARRAY FOR REPACKING SUBLISTS */
256 for (i
=j
=0;i
<n
;i
++) { /* GENERATE LIST FOR SORTING; ASSIGN HEADER INDEXES*/
257 parent
=im
[i
].sublist
;
258 if (parent
>=0) {/* IN A SUBLIST */
260 imsub
[j
].sublist
=parent
;
262 if (im
[parent
].sublist
<0) /* A NEW PARENT! SET HIS SUBLIST HEADER INDEX */
263 im
[parent
].sublist
=nlists
++;
265 im
[i
].sublist
= -1; /* RESET TO DEFAULT VALUE: NO SUBLIST */
267 qsort(imsub
,nsub
,sizeof(IntervalMap
),sublist_qsort_cmp
);
268 /* AT THIS POINT SUBLISTS ARE GROUPED TOGETHER, READY TO PACK */
270 CALLOC(subheader
,nlists
,SublistHeader
); /* SUBLIST HEADER INDEX */
271 for (i
=0;i
<nsub
;i
++) { /* COPY SUBLIST ENTRIES TO imsub */
273 parent
=imsub
[i
].sublist
;
274 memcpy(imsub
+i
,im
+j
,sizeof(IntervalMap
)); /* COPY INTERVAL */
275 k
=im
[parent
].sublist
;
276 if (subheader
[k
].len
==0) /* START A NEW SUBLIST */
277 subheader
[k
].start
=i
;
278 subheader
[k
].len
++; /* COUNT THE SUBLIST ENTRIES */
279 im
[j
].start
=im
[j
].end
= -1; /* MARK FOR DELETION */
280 } /* DONE COPYING ALL SUBLISTS TO imsub */
282 for (i
=j
=0;i
<n
;i
++) /* COMPRESS THE LIST TO REMOVE SUBLISTS */
283 if (im
[i
].start
!= -1 || im
[i
].end
!= -1) { /* NOT IN A SUBLIST, SO KEEP */
284 if (j
<i
) /* COPY TO NEW COMPACTED LOCATION */
285 memcpy(im
+j
,im
+i
,sizeof(IntervalMap
));
289 memcpy(im
+j
,imsub
,nsub
*sizeof(IntervalMap
)); /* COPY THE SUBLISTS */
290 for (i
=0;i
<nlists
;i
++) /* ADJUST start ADDRESSES FOR SHIFT*/
291 subheader
[i
].start
+= j
;
293 *p_n
= j
; /* COPY THE COMPRESSED LIST SIZES BACK TO CALLER*/
295 else { /* NO SUBLISTS: HANDLE THIS CASE CAREFULLY */
297 CALLOC(subheader
,1,SublistHeader
); /* RETURN A DUMMY ARRAY, SINCE NULL RETURN IS ERROR CODE */
299 *p_nlists
=nlists
; /* RETURN COUNT OF NUMBER OF SUBLISTS */
301 handle_malloc_failure
:
302 FREE(imsub
); /* FREE ANY MALLOCS WE PERFORMED*/
308 IntervalMap
*interval_map_alloc(int n
)
310 IntervalMap
*im
=NULL
;
311 CALLOC(im
,n
,IntervalMap
);
313 handle_malloc_failure
:
319 int find_overlap_start(int start
,int end
,IntervalMap im
[],int n
)
326 if (END_POSITIVE(im
[mid
])<=start
)
331 if (l
<n
&& HAS_OVERLAP_POSITIVE(im
[l
],start
,end
))
332 return l
; /* l IS START OF OVERLAP */
334 return -1; /* NO OVERLAP FOUND */
340 int find_index_start(int start
,int end
,IntervalIndex im
[],int n
)
347 if (END_POSITIVE(im
[mid
])<=start
)
352 return l
; /* l IS START OF POSSIBLE OVERLAP */
357 int find_suboverlap_start(int start
,int end
,int isub
,IntervalMap im
[],
358 SublistHeader subheader
[],int nlists
)
363 i
=find_overlap_start(start
,end
,im
+subheader
[isub
].start
,subheader
[isub
].len
);
365 return i
+subheader
[isub
].start
;
371 IntervalIterator
*interval_iterator_alloc(void)
373 IntervalIterator
*it
=NULL
;
374 CALLOC(it
,1,IntervalIterator
);
376 handle_malloc_failure
:
380 int free_interval_iterator(IntervalIterator
*it
)
382 IntervalIterator
*it2
,*it_next
;
385 FREE_ITERATOR_STACK(it
,it2
,it_next
);
390 IntervalIterator
*reset_interval_iterator(IntervalIterator
*it
)
392 ITERATOR_STACK_TOP(it
);
398 void reorient_intervals(int n
,IntervalMap im
[],int ori_sign
)
402 if ((im
[i
].start
>=0 ? 1:-1)!=ori_sign
) { /* ORIENTATION MISMATCH */
403 tmp
=im
[i
].start
; /* SO REVERSE THIS INTERVAL MAPPING */
404 im
[i
].start
= -im
[i
].end
;
406 tmp
=im
[i
].target_start
;
407 im
[i
].target_start
= -im
[i
].target_end
;
408 im
[i
].target_end
= -tmp
;
413 int find_intervals(IntervalIterator
*it0
,int start
,int end
,
414 IntervalMap im
[],int n
,
415 SublistHeader subheader
[],int nlists
,
416 IntervalMap buf
[],int nbuf
,
417 int *p_nreturn
,IntervalIterator
**it_return
)
419 IntervalIterator
*it
=NULL
,*it2
=NULL
;
420 int ibuf
=0,j
,k
,ori_sign
=1;
421 if (!it0
) { /* ALLOCATE AN ITERATOR IF NOT SUPPLIED*/
422 CALLOC(it
,1,IntervalIterator
);
427 #if defined(ALL_POSITIVE_ORIENTATION) || defined(MERGE_INTERVAL_ORIENTATIONS)
428 if (start
<0) { /* NEED TO CONVERT TO POSITIVE ORIENTATION */
435 if (it
->n
== 0) { /* DEFAULT: SEARCH THE TOP NESTED LIST */
437 it
->i
=find_overlap_start(start
,end
,im
,n
);
440 while (it
->i
>=0 && it
->i
<it
->n
&& HAS_OVERLAP_POSITIVE(im
[it
->i
],start
,end
)) {
441 memcpy(buf
+ibuf
,im
+ it
->i
,sizeof(IntervalMap
)); /*SAVE THIS HIT TO BUFFER */
443 k
=im
[it
->i
].sublist
; /* GET SUBLIST OF i IF ANY */
444 it
->i
++; /* ADVANCE TO NEXT INTERVAL */
445 if (k
>=0 && (j
=find_suboverlap_start(start
,end
,k
,im
,subheader
,nlists
))>=0) {
446 PUSH_ITERATOR_STACK(it
,it2
,IntervalIterator
); /* RECURSE TO SUBLIST */
447 it2
->i
= j
; /* START OF OVERLAPPING HITS IN THIS SUBLIST */
448 it2
->n
= subheader
[k
].start
+subheader
[k
].len
; /* END OF SUBLIST */
449 it
=it2
; /* PUSH THE ITERATOR STACK */
451 if (ibuf
>=nbuf
) /* FILLED THE BUFFER, RETURN THE RESULTS SO FAR */
452 goto finally_return_result
;
454 } while (POP_ITERATOR_STACK(it
)); /* IF STACK EXHAUSTED, EXIT */
455 if (!it0
) /* FREE THE ITERATOR WE CREATED. NO NEED TO RETURN IT TO USER */
456 free_interval_iterator(it
);
457 it
=NULL
; /* ITERATOR IS EXHAUSTED */
459 finally_return_result
:
460 #if defined(ALL_POSITIVE_ORIENTATION) || defined(MERGE_INTERVAL_ORIENTATIONS)
461 reorient_intervals(ibuf
,buf
,ori_sign
); /* REORIENT INTERVALS TO MATCH QUERY ORI */
463 *p_nreturn
=ibuf
; /* #INTERVALS FOUND IN THIS PASS */
464 *it_return
=it
; /* HAND BACK ITERATOR FOR CONTINUING THE SEARCH, IF ANY */
465 return 0; /* SIGNAL THAT NO ERROR OCCURRED */
466 handle_malloc_failure
:
474 /****************************************************************
476 * FILE-BASED SEARCH FUNCTIONS
480 /* READ A BLOCK FROM THE DATABASE FILE */
481 int read_imdiv(FILE *ifile
,IntervalMap imdiv
[],int div
,int i_div
,int ntop
)
485 ipos
=div
*i_div
; /* CALCULATE POSITION IN RECORDS */
486 if (ipos
+div
<=ntop
) /* GET A WHOLE BLOCK */
488 else /* JUST READ PARTIAL BLOCK AT END */
490 ipos
*= sizeof(IntervalMap
); /* CALCULATE FILE POSITION IN BYTES */
491 PYGR_FSEEK(ifile
,ipos
,SEEK_SET
);
492 fread(imdiv
,sizeof(IntervalMap
),block
,ifile
);
497 /* READ A SUBLIST FROM DATABASE FILE */
498 IntervalMap
*read_sublist(FILE *ifile
,SublistHeader
*subheader
,
503 CALLOC(im
,subheader
->len
,IntervalMap
);
505 ipos
=subheader
->start
; /* CALCULATE POSITION IN RECORDS */
506 ipos
*=sizeof(IntervalMap
); /* CALCULATE FILE POSITION IN BYTES */
507 PYGR_FSEEK(ifile
,ipos
,SEEK_SET
);
508 fread(im
,sizeof(IntervalMap
),subheader
->len
,ifile
);
510 handle_malloc_failure
:
515 /* READ A BLOCK OF THE SUBLIST HEADER FILE */
516 int read_subheader_block(SublistHeader subheader
[],int isub
,int nblock
,
517 int nsubheader
,FILE *ifile
)
521 start
=isub
-(isub
%nblock
); /* GET BLOCK START */
522 if (start
+nblock
>nsubheader
)
523 nblock
=nsubheader
-start
; /* TRUNCATE TO FIT MAX FILE LENGTH */
524 ipos
=start
; /* CONVERT TO off_t TYPE */
525 ipos
*= sizeof(SublistHeader
); /* CALCULATE ACTUAL BYTE OFFSET */
526 PYGR_FSEEK(ifile
,ipos
,SEEK_SET
);
527 fread(subheader
,sizeof(SublistHeader
),nblock
,ifile
);
534 int find_file_start(IntervalIterator
*it
,int start
,int end
,int isub
,
535 IntervalIndex ii
[],int nii
,
536 SublistHeader
*subheader
,int nlists
,
537 SubheaderFile
*subheader_file
,
538 int ntop
,int div
,FILE *ifile
)
540 int i_div
= -1,offset
=0,offset_div
=0;
541 if (isub
<0) /* TOP-LEVEL SEARCH: USE THE INDEX */
542 i_div
=find_index_start(start
,end
,ii
,nii
);
543 else { /* GET PTR TO subheader[isub] */
544 #ifdef ON_DEMAND_SUBLIST_HEADER
545 if (isub
<subheader_file
->start
/* isub OUTSIDE OUR CURRENT BLOCK */
546 || isub
>=subheader_file
->start
+subheader_file
->nblock
)
547 subheader_file
->start
= /* LOAD NEW BLOCK FROM DISK */
548 read_subheader_block(subheader_file
->subheader
,isub
,
549 subheader_file
->nblock
,nlists
,
550 subheader_file
->ifile
);
551 subheader
=subheader_file
->subheader
+ (isub
-subheader_file
->start
);
553 subheader
+= isub
; /* POINT TO OUR SUBHEADER */
555 if (subheader
->len
>div
) { /* BIG SUBLIST, SO USE THE INDEX */
556 offset
=subheader
->start
;
557 offset_div
=offset
/div
;/* offset GUARANTEED TO BE MULTIPLE OF div */
559 nii
=ntop
/div
; /* CALCULATE SUBLIST INDEX SIZE */
560 if (ntop
%div
) /* ONE EXTRA ENTRY FOR PARTIAL BLOCK */
562 i_div
=find_index_start(start
,end
,ii
+offset_div
,nii
);
566 if (!it
->im
) { /* NO ALLOCATION? ALLOCATE OUR BLOCK SIZE div */
567 CALLOC(it
->im
,div
,IntervalMap
); /* ALWAYS ALLOCATE div BUFFERSIZE */
569 if (i_div
>=0) { /* READ A SPECIFIC BLOCK OF SIZE div */
570 it
->n
=read_imdiv(ifile
,it
->im
,div
,i_div
+offset_div
,ntop
+offset
);
571 it
->ntop
=ntop
+offset
; /* END OF THIS LIST IN THE BINARY FILE */
572 it
->nii
=nii
+offset_div
; /* SAVE INFORMATION FOR READING SUBSEQUENT BLOCKS */
573 it
->i_div
=i_div
+offset_div
; /* INDEX OF THIS BLOCK IN THE BINARY FILE */
575 else { /* A SMALL SUBLIST: READ THE WHOLE LIST INTO MEMORY */
576 read_sublist(ifile
,subheader
,it
->im
); /* GUARANTEED TO BE <=div ITEMS */
577 it
->n
=subheader
->len
;
579 it
->i_div
=0; /* INDICATE THAT THERE ARE NO ADDITIONAL BLOCKS TO READ*/
582 it
->i
=find_overlap_start(start
,end
,it
->im
,it
->n
);
584 handle_malloc_failure
:
585 return -2; /* SIGNAL THAT MEMORY ERROR OCCURRED */
589 int find_file_intervals(IntervalIterator
*it0
,int start
,int end
,
590 IntervalIndex ii
[],int nii
,
591 SublistHeader subheader
[],int nlists
,
592 SubheaderFile
*subheader_file
,
593 int ntop
,int div
,FILE *ifile
,
594 IntervalMap buf
[],int nbuf
,
595 int *p_nreturn
,IntervalIterator
**it_return
)
597 IntervalIterator
*it
=NULL
,*it2
=NULL
;
598 int k
,ibuf
=0,ori_sign
=1,ov
=0;
599 if (!it0
) { /* ALLOCATE AN ITERATOR IF NOT SUPPLIED*/
600 CALLOC(it
,1,IntervalIterator
);
605 #if defined(ALL_POSITIVE_ORIENTATION) || defined(MERGE_INTERVAL_ORIENTATIONS)
606 if (start
<0) { /* NEED TO CONVERT TO POSITIVE ORIENTATION */
614 if (it
->n
== 0) /* DEFAULT: SEARCH THE TOP NESTED LIST */
615 if (find_file_start(it
,start
,end
,-1,ii
,nii
,subheader
,nlists
,
616 subheader_file
,ntop
,div
,ifile
) == FIND_FILE_MALLOC_ERR
)
617 goto handle_malloc_failure
;
619 do { /* ITERATOR STACK LOOP */
620 while (it
->i_div
< it
->nii
) { /* BLOCK ITERATION LOOP */
621 while (it
->i
>=0 && it
->i
<it
->n
/* INDIVIDUAL INTERVAL ITERATION LOOP */
622 && HAS_OVERLAP_POSITIVE(it
->im
[it
->i
],start
,end
)) { /*OVERLAPS!*/
623 memcpy(buf
+ibuf
,it
->im
+ it
->i
,sizeof(IntervalMap
)); /*SAVE THIS HIT */
625 k
=it
->im
[it
->i
].sublist
; /* GET SUBLIST OF i IF ANY */
626 it
->i
++; /* ADVANCE TO NEXT INTERVAL */
627 PUSH_ITERATOR_STACK(it
,it2
,IntervalIterator
); /* RECURSE TO SUBLIST */
628 if (k
>=0 && (ov
=find_file_start(it2
,start
,end
,k
,ii
,nii
,subheader
,nlists
,
629 subheader_file
,ntop
,div
,ifile
))>=0)
630 it
=it2
; /* PUSH THE ITERATOR STACK */
631 if (FIND_FILE_MALLOC_ERR
== ov
)
632 goto handle_malloc_failure
;
634 if (ibuf
>=nbuf
) /* FILLED THE BUFFER, RETURN THE RESULTS SO FAR */
635 goto finally_return_result
;
637 it
->i_div
++; /* TRY GOING TO NEXT BLOCK */
638 if (it
->i
== it
->n
/* USED WHOLE BLOCK, SO THERE MIGHT BE MORE */
639 && it
->i_div
< it
->nii
) { /* CONTINUE TO NEXT BLOCK */
640 it
->n
=read_imdiv(ifile
,it
->im
,div
,it
->i_div
,it
->ntop
); /*READ NEXT BLOCK*/
641 it
->i
=0; /* PROCESS IT FROM ITS START */
644 } while (POP_ITERATOR_STACK(it
)); /* IF STACK EXHAUSTED, EXIT */
645 if (!it0
) /* FREE THE ITERATOR WE CREATED. NO NEED TO RETURN IT TO USER */
646 free_interval_iterator(it
);
647 it
=NULL
; /* ITERATOR IS EXHAUSTED */
649 finally_return_result
:
650 #if defined(ALL_POSITIVE_ORIENTATION) || defined(MERGE_INTERVAL_ORIENTATIONS)
651 reorient_intervals(ibuf
,buf
,ori_sign
); /* REORIENT INTERVALS TO MATCH QUERY ORI */
653 *p_nreturn
=ibuf
; /* #INTERVALS FOUND IN THIS PASS */
654 *it_return
=it
; /* HAND BACK ITERATOR FOR CONTINUING THE SEARCH, IF ANY */
655 return 0; /* SIGNAL THAT NO ERROR OCCURRED */
656 handle_malloc_failure
:
665 /* FUNCTIONS FOR READING AND WRITING OF THE BINARY DATABASE FILES */
667 int write_padded_binary(IntervalMap im
[],int n
,int div
,FILE *ifile
)
670 fwrite(im
,sizeof(IntervalMap
),n
,ifile
); /* SAVE THE ACTUAL DATA */
673 npad
=div
-npad
; /* #ITEMS NEEDED TO PAD TO AN EXACT MULTIPLE OF div */
674 for (i
=0;i
<npad
;i
++) /* GUARANTEED im HAS AT LEAST ONE RECORD */
675 fwrite(im
,sizeof(IntervalMap
),1,ifile
); /*THIS IS JUST PADDING */
677 return n
+npad
; /* #RECORDS SAVED */
681 int repack_subheaders(IntervalMap im
[],int n
,int div
,
682 SublistHeader
*subheader
,int nlists
)
684 int i
,j
,*sub_map
=NULL
;
685 SublistHeader
*sub_pack
=NULL
;
687 CALLOC(sub_map
,nlists
,int);
688 CALLOC(sub_pack
,nlists
,SublistHeader
);
689 for (i
=j
=0;i
<nlists
;i
++) { /* PLACE SUBLISTS W/ len>div AT FRONT */
690 if (subheader
[i
].len
>div
) {
691 memcpy(sub_pack
+j
,subheader
+i
,sizeof(SublistHeader
));
696 for (i
=0;i
<nlists
;i
++) { /* PLACE SUBLISTS W/ len<=div AFTERWARDS */
697 if (subheader
[i
].len
<=div
) {
698 memcpy(sub_pack
+j
,subheader
+i
,sizeof(SublistHeader
));
703 for (i
=0;i
<n
;i
++) /* ADJUST im[].sublist TO THE NEW LOCATIONS */
704 if (im
[i
].sublist
>=0)
705 im
[i
].sublist
=sub_map
[im
[i
].sublist
];
706 memcpy(subheader
,sub_pack
,nlists
*sizeof(SublistHeader
)); /* SAVE REORDERED LIST*/
711 handle_malloc_failure
:
716 int write_binary_index(IntervalMap im
[],int n
,int div
,FILE *ifile
)
719 for (i
=0;i
<n
;i
+=div
) {
720 #ifdef MERGE_INTERVAL_ORIENTATIONS
721 if (im
[i
].start
>=0) /* FORWARD ORI */
723 fwrite(&(im
[i
].start
),sizeof(int),1,ifile
); /*SAVE start */
724 #ifdef MERGE_INTERVAL_ORIENTATIONS
725 else { /* REVERSE ORI */
727 fwrite(&j
,sizeof(int),1,ifile
); /*SAVE start */
733 #ifdef MERGE_INTERVAL_ORIENTATIONS
734 if (im
[j
].start
>=0) /* FORWARD ORI */
736 fwrite(&(im
[j
].end
),sizeof(int),1,ifile
); /*SAVE end */
737 #ifdef MERGE_INTERVAL_ORIENTATIONS
738 else { /* REVERSE ORI */
740 fwrite(&j
,sizeof(int),1,ifile
); /*SAVE end */
750 char *write_binary_files(IntervalMap im
[],int n
,int ntop
,int div
,
751 SublistHeader
*subheader
,int nlists
,char filestem
[])
755 FILE *ifile
=NULL
,*ifile_subheader
=NULL
;
756 SublistHeader sh_tmp
;
757 static char err_msg
[1024];
759 if (nlists
>0 /* REPACK SMALL SUBLISTS TO END */
760 && repack_subheaders(im
,n
,div
,subheader
,nlists
)
761 == FIND_FILE_MALLOC_ERR
) {
762 sprintf(err_msg
,"unable to malloc %d subheaders",nlists
);
765 sprintf(path
,"%s.subhead",filestem
); /* SAVE THE SUBHEADER LIST */
766 ifile_subheader
=fopen(path
,"wb"); /* binary file */
767 if (!ifile_subheader
) {
768 sprintf(err_msg
,"unable to open file %s for writing",path
);
771 sprintf(path
,"%s.idb",filestem
); /* SAVE THE DATABASE */
772 ifile
=fopen(path
,"wb"); /* binary file */
774 sprintf(err_msg
,"unable to open file %s for writing",path
);
777 npad
=write_padded_binary(im
,ntop
,div
,ifile
); /* WRITE THE TOP LEVEL LIST */
778 for (i
=0;i
<nlists
;i
++) {
779 sh_tmp
.start
=npad
; /* FILE LOCATION WHERE THIS SUBLIST STORED */
780 sh_tmp
.len
=subheader
[i
].len
; /* SAVE THE TRUE SUBLIST LENGTH, UNPADDED */
781 fwrite(&sh_tmp
,sizeof(SublistHeader
),1,ifile_subheader
);
782 if (subheader
[i
].len
>div
) /* BIG LIST: PAD TO EXACT MULTIPLE OF div */
783 npad
+=write_padded_binary(im
+subheader
[i
].start
,subheader
[i
].len
,div
,ifile
);
784 else { /* SMALL LIST: SAVE W/O PADDING */
785 fwrite(im
+subheader
[i
].start
,sizeof(IntervalMap
),subheader
[i
].len
,ifile
);
786 npad
+=subheader
[i
].len
;
790 fclose(ifile_subheader
);
792 sprintf(path
,"%s.index",filestem
); /* SAVE THE COMPACTED INDEX */
793 ifile
=fopen(path
,"wb"); /* binary file */
795 sprintf(err_msg
,"unable to open file %s for writing",path
);
798 nii
=write_binary_index(im
,ntop
,div
,ifile
);
799 for (i
=0;i
<nlists
;i
++) /* ALSO STORE INDEX DATA FOR BIG SUBLISTS */
800 if (subheader
[i
].len
>div
)
801 nii
+=write_binary_index(im
+subheader
[i
].start
,subheader
[i
].len
,div
,ifile
);
804 sprintf(path
,"%s.size",filestem
); /* SAVE BASIC SIZE INFO*/
805 ifile
=fopen(path
,"w"); /* text file */
807 sprintf(err_msg
,"unable to open file %s for writing",path
);
810 fprintf(ifile
,"%d %d %d %d %d\n",n
,ntop
,div
,nlists
,nii
);
813 return NULL
; /* RETURN CODE SIGNALS SUCCESS!! */
818 IntervalDBFile
*read_binary_files(char filestem
[],char err_msg
[],
819 int subheader_nblock
)
821 int n
,ntop
,div
,nlists
,nii
;
823 IntervalIndex
*ii
=NULL
;
824 SublistHeader
*subheader
=NULL
;
825 IntervalDBFile
*idb_file
=NULL
;
828 sprintf(path
,"%s.size",filestem
); /* READ BASIC SIZE INFO*/
829 ifile
=fopen(path
,"r"); /* text file */
832 sprintf(err_msg
,"unable to open file %s",path
);
835 fscanf(ifile
,"%d %d %d %d %d",&n
,&ntop
,&div
,&nlists
,&nii
);
838 CALLOC(ii
,nii
+1,IntervalIndex
);
840 sprintf(path
,"%s.index",filestem
); /* READ THE COMPACTED INDEX */
841 ifile
=fopen(path
,"rb"); /* binary file */
844 sprintf(err_msg
,"unable to open file %s",path
);
847 fread(ii
,sizeof(IntervalIndex
),nii
,ifile
);
851 CALLOC(idb_file
,1,IntervalDBFile
);
853 sprintf(path
,"%s.subhead",filestem
); /* SAVE THE SUBHEADER LIST */
854 ifile
=fopen(path
,"rb"); /* binary file */
857 sprintf(err_msg
,"unable to open file %s",path
);
860 #ifdef ON_DEMAND_SUBLIST_HEADER
861 CALLOC(subheader
,subheader_nblock
,SublistHeader
);
862 idb_file
->subheader_file
.subheader
=subheader
;
863 idb_file
->subheader_file
.nblock
=subheader_nblock
;
864 idb_file
->subheader_file
.start
= -subheader_nblock
; /* NO BLOCK LOADED */
865 idb_file
->subheader_file
.ifile
=ifile
;
867 CALLOC(subheader
,nlists
,SublistHeader
); /* LOAD THE ENTIRE SUBHEADER */
868 fread(subheader
,sizeof(SublistHeader
),nlists
,ifile
); /*SAVE LIST */
875 idb_file
->nlists
=nlists
;
877 idb_file
->nii
=ntop
/div
;
878 if (ntop
%div
) /* INDEX IS PADDED TO EXACT MULTIPLE OF div */
879 idb_file
->nii
++; /* ONE EXTRA ENTRY FOR PARTIAL BLOCK */
881 idb_file
->subheader
=subheader
;
882 sprintf(path
,"%s.idb",filestem
); /* OPEN THE DATABASE */
883 idb_file
->ifile_idb
=fopen(path
,"rb"); /* binary file */
884 if (!idb_file
->ifile_idb
) {
886 sprintf(err_msg
,"unable to open file %s",path
);
891 handle_malloc_failure
:
892 FREE(ii
); /* DUMP OUR MEMORY */
900 int free_interval_dbfile(IntervalDBFile
*db_file
)
902 if (db_file
->ifile_idb
)
903 fclose(db_file
->ifile_idb
);
904 #ifdef ON_DEMAND_SUBLIST_HEADER
905 if (db_file
->subheader_file
.ifile
)
906 fclose(db_file
->subheader_file
.ifile
);
909 FREE(db_file
->subheader
);
917 int save_text_file(char filestem
[],char basestem
[],
918 char err_msg
[],FILE *ofile
)
920 int i
,n
,ntop
,div
,nlists
,nii
,npad
;
924 SublistHeader subheader
;
927 sprintf(path
,"%s.size",filestem
); /* READ BASIC SIZE INFO*/
928 ifile
=fopen(path
,"r"); /* text file */
930 goto unable_to_open_file
;
931 if (5!=fscanf(ifile
,"%d %d %d %d %d",&n
,&ntop
,&div
,&nlists
,&nii
))
932 goto fread_error_occurred
;
935 if (npad
>0) /* PAD TO AN EXACT MULTIPLE OF div */
936 npad
=ntop
+(div
-npad
);
937 else /* AN EXACT MULTIPLE OF div, SO NO PADDING */
940 if (fprintf(ofile
,"SIZE\t%s\t%d %d %d %d %d\n",
941 basestem
,n
,ntop
,div
,nlists
,nii
)<0)
942 goto write_error_occurred
;
945 sprintf(path
,"%s.index",filestem
); /* READ THE COMPACTED INDEX */
946 ifile
=fopen(path
,"rb"); /* binary file */
948 goto unable_to_open_file
;
949 for (i
=0;i
<nii
;i
++) {
950 if (1!=fread(&ii
,sizeof(IntervalIndex
),1,ifile
))
951 goto fread_error_occurred
;
952 if (fprintf(ofile
,"I %d %d\n",ii
.start
,ii
.end
)<0)
953 goto write_error_occurred
;
959 sprintf(path
,"%s.subhead",filestem
); /* READ THE SUBHEADER LIST */
960 ifile
=fopen(path
,"rb"); /* binary file */
962 goto unable_to_open_file
;
963 for (i
=0;i
<nlists
;i
++) {
964 if (1!=fread(&subheader
,sizeof(SublistHeader
),1,ifile
))
965 goto fread_error_occurred
;
966 if (fprintf(ofile
,"S %d %d\n",subheader
.start
,subheader
.len
)<0)
967 goto write_error_occurred
;
968 npad
=subheader
.start
+subheader
.len
;
974 sprintf(path
,"%s.idb",filestem
); /* READ THE DATABASE */
975 ifile
=fopen(path
,"rb"); /* binary file */
977 goto unable_to_open_file
;
978 for (i
=0;i
<npad
;i
++) {
979 if (1!=fread(&im
,sizeof(IntervalMap
),1,ifile
))
980 goto fread_error_occurred
;
981 if (fprintf(ofile
,"M %d %d %d %d %d %d\n",im
.start
,im
.end
,
982 im
.target_id
,im
.target_start
,
983 im
.target_end
,im
.sublist
)<0)
984 goto write_error_occurred
;
988 return 0; /* INDICATES NO ERROR OCCURRED */
991 sprintf(err_msg
,"unable to open file %s",path
);
993 fread_error_occurred
:
995 sprintf(err_msg
,"error or EOF reading file %s",path
);
997 write_error_occurred
:
999 sprintf(err_msg
,"error writing output file! out of disk space?");
1005 int text_file_to_binaries(FILE *infile
,char buildpath
[],char err_msg
[])
1007 int i
,n
,ntop
,div
,nlists
,nii
,npad
;
1008 char path
[2048],line
[32768],filestem
[2048];
1011 SublistHeader subheader
;
1014 if (NULL
==fgets(line
,32767,infile
))
1015 goto fread_error_occurred
;
1016 if (6!=sscanf(line
,"SIZE\t%s\t%d %d %d %d %d",
1017 filestem
,&n
,&ntop
,&div
,&nlists
,&nii
))
1018 goto fread_error_occurred
;
1019 sprintf(path
,"%s%s.size",buildpath
,filestem
); /* SAVE BASIC SIZE INFO*/
1020 ifile
=fopen(path
,"w"); /* text file */
1022 goto unable_to_open_file
;
1023 if (fprintf(ifile
,"%d %d %d %d %d\n",n
,ntop
,div
,nlists
,nii
)<0)
1024 goto write_error_occurred
;
1027 if (npad
>0) /* PAD TO AN EXACT MULTIPLE OF div */
1028 npad
=ntop
+(div
-npad
);
1029 else /* AN EXACT MULTIPLE OF div, SO NO PADDING */
1033 sprintf(path
,"%s%s.index",buildpath
,filestem
); /* SAVE INDEX INFO*/
1034 ifile
=fopen(path
,"wb"); /* binary file */
1036 goto unable_to_open_file
;
1037 for (i
=0;i
<nii
;i
++) {
1038 if (NULL
==fgets(line
,32767,infile
))
1039 goto fread_error_occurred
;
1040 if (2!=sscanf(line
,"I %d %d",&(ii
.start
),&(ii
.end
)))
1041 goto fread_error_occurred
;
1042 if (1!=fwrite(&ii
,sizeof(IntervalIndex
),1,ifile
))
1043 goto write_error_occurred
;
1049 sprintf(path
,"%s%s.subhead",buildpath
,filestem
); /* SAVE THE SUBHEADER LIST */
1050 ifile
=fopen(path
,"wb"); /* binary file */
1052 goto unable_to_open_file
;
1053 for (i
=0;i
<nlists
;i
++) {
1054 if (NULL
==fgets(line
,32767,infile
))
1055 goto fread_error_occurred
;
1056 if (2!=sscanf(line
,"S %d %d",&(subheader
.start
),&(subheader
.len
)))
1057 goto fread_error_occurred
;
1058 if (1!=fwrite(&subheader
,sizeof(SublistHeader
),1,ifile
))
1059 goto write_error_occurred
;
1060 npad
=subheader
.start
+subheader
.len
;
1065 sprintf(path
,"%s%s.idb",buildpath
,filestem
); /* SAVE THE ACTUAL INTERVAL DB*/
1066 ifile
=fopen(path
,"wb"); /* binary file */
1068 goto unable_to_open_file
;
1069 for (i
=0;i
<npad
;i
++) {
1070 if (NULL
==fgets(line
,32767,infile
))
1071 goto fread_error_occurred
;
1072 if (6!=sscanf(line
,"M %d %d %d %d %d %d",&(im
.start
),&(im
.end
),
1073 &(im
.target_id
),&(im
.target_start
),
1074 &(im
.target_end
),&(im
.sublist
)))
1075 goto fread_error_occurred
;
1076 if (1!=fwrite(&im
,sizeof(IntervalMap
),1,ifile
))
1077 goto write_error_occurred
;
1081 return 0; /* INDICATES NO ERROR OCCURRED */
1082 unable_to_open_file
:
1084 sprintf(err_msg
,"unable to open file %s",path
);
1086 fread_error_occurred
:
1088 sprintf(err_msg
,"error or EOF reading input file");
1090 write_error_occurred
:
1092 sprintf(err_msg
,"error writing file %s! out of disk space?",