7 /****************************
8 * implementation dependent *
9 ****************************/
11 /* template for workfiles (8.3 format) */
12 #define FNAME "_sort%03d.dat"
15 /* comparison operators */
16 #define compLT(x,y) (x < y)
17 #define compGT(x,y) (x > y)
19 /* define the record to be sorted here */
22 typedef struct recTypeTag {
23 keyType key; /* sort key for record */
25 char data[LRECL-sizeof(keyType)]; /* other fields */
29 /******************************
30 * implementation independent *
31 ******************************/
33 typedef enum {false, true} bool;
35 typedef struct tmpFileTag {
36 FILE *fp; /* file pointer */
37 char name[LNAME]; /* filename */
38 recType rec; /* last record read */
39 int dummy; /* number of dummy runs */
40 bool eof; /* end-of-file flag */
41 bool eor; /* end-of-run flag */
42 bool valid; /* true if rec is valid */
43 int fib; /* ideal fibonacci number */
46 static tmpFileType **file; /* array of file info for tmp files */
47 static int nTmpFiles; /* number of tmp files */
48 static char *ifName; /* input filename */
49 static char *ofName; /* output filename */
51 static int level; /* level of runs */
52 static int nNodes; /* number of nodes for selection tree */
54 void deleteTmpFiles(void) {
57 /* delete merge files and free resources */
59 for (i = 0; i < nTmpFiles; i++) {
61 if (file[i]->fp) fclose(file[i]->fp);
62 if (*file[i]->name) remove(file[i]->name);
70 void termTmpFiles(int rc) {
77 /* file[T] contains results */
78 fileT = nTmpFiles - 1;
79 fclose(file[fileT]->fp); file[fileT]->fp = NULL;
80 if (rename(file[fileT]->name, ofName)) {
85 *file[fileT]->name = 0;
90 void cleanExit(int rc) {
92 /* cleanup tmp files and exit */
97 void *safeMalloc(size_t size) {
100 /* safely allocate memory and initialize to zero */
101 if ((p = calloc(1, size)) == NULL) {
102 printf("error: malloc failed, size = %d\n", size);
108 void initTmpFiles(void) {
110 tmpFileType *fileInfo;
112 /* initialize merge files */
113 if (nTmpFiles < 3) nTmpFiles = 3;
114 file = safeMalloc(nTmpFiles * sizeof(tmpFileType*));
115 fileInfo = safeMalloc(nTmpFiles * sizeof(tmpFileType));
116 for (i = 0; i < nTmpFiles; i++) {
117 file[i] = fileInfo + i;
118 sprintf(file[i]->name, FNAME, i);
119 if ((file[i]->fp = fopen(file[i]->name, "w+b")) == NULL) {
126 recType *readRec(void) {
128 typedef struct iNodeTag { /* internal node */
129 struct iNodeTag *parent;/* parent of internal node */
130 struct eNodeTag *loser; /* external loser */
133 typedef struct eNodeTag { /* external node */
134 struct iNodeTag *parent;/* parent of external node */
135 recType rec; /* input record */
136 int run; /* run number */
137 bool valid; /* input record is valid */
140 typedef struct nodeTag {
141 iNodeType i; /* internal node */
142 eNodeType e; /* external node */
145 static nodeType *node; /* array of selection tree nodes */
146 static eNodeType *win; /* new winner */
147 static FILE *ifp; /* input file */
148 static bool eof; /* true if end-of-file, input */
149 static int maxRun; /* maximum run number */
150 static int curRun; /* current run number */
151 iNodeType *p; /* pointer to internal nodes */
152 static bool lastKeyValid; /* true if lastKey is valid */
153 static keyType lastKey; /* last key written */
155 /* read next record using replacement selection */
157 /* check for first call */
161 if (nNodes < 2) nNodes = 2;
162 node = safeMalloc(nNodes * sizeof(nodeType));
163 for (i = 0; i < nNodes; i++) {
164 node[i].i.loser = &node[i].e;
165 node[i].i.parent = &node[i/2].i;
166 node[i].e.parent = &node[(nNodes + i)/2].i;
168 node[i].e.valid = false;
171 lastKeyValid = false;
173 if ((ifp = fopen(ifName, "rb")) == NULL) {
174 printf("error: file %s, unable to open\n", ifName);
181 /* replace previous winner with new record */
183 if (fread(&win->rec, sizeof(recType), 1, ifp) == 1) {
184 if ((!lastKeyValid || compLT(win->rec.key, lastKey))
185 && (++win->run > maxRun))
188 } else if (feof(ifp)) {
192 win->run = maxRun + 1;
199 win->run = maxRun + 1;
202 /* adjust loser and winner pointers */
207 if (p->loser->run < win->run) {
209 } else if (p->loser->run == win->run) {
210 if (p->loser->valid && win->valid) {
211 if (compLT(p->loser->rec.key, win->rec.key))
218 /* p should be winner */
226 } while (p != &node[0].i);
229 if (win->run != curRun) {
230 /* win->run = curRun + 1 */
231 if (win->run > maxRun) {
239 /* output top of tree */
241 lastKey = win->rec.key;
248 void makeRuns(void) {
249 recType *win; /* winner */
250 int fileT; /* last file */
251 int fileP; /* next to last file */
252 int j; /* selects file[j] */
255 /* Make initial runs using replacement selection.
256 * Runs are written using a Fibonacci distintbution.
259 /* initialize file structures */
260 fileT = nTmpFiles - 1;
262 for (j = 0; j < fileT; j++) {
266 file[fileT]->fib = 0;
267 file[fileT]->dummy = 0;
278 for (j = 0; win && j <= fileP; j++) {
282 if (file[j]->valid) {
283 if (!compLT(win->key, file[j]->rec.key)) {
284 /* append to an existing run */
286 } else if (file[j]->dummy) {
287 /* start a new run */
292 /* first run in file */
302 if (fwrite(win, sizeof(recType), 1, file[j]->fp) != 1) {
306 file[j]->rec.key = win->key;
307 file[j]->valid = true;
308 if ((win = readRec()) == NULL) break;
309 if (compLT(win->key, file[j]->rec.key)) break;
314 /* if no room for runs, up a level */
319 for (j = 0; j <= fileP; j++) {
320 file[j]->dummy = t + file[j+1]->fib - file[j]->fib;
321 file[j]->fib = t + file[j+1]->fib;
327 void rewindFile(int j) {
328 /* rewind file[j] and read in first record */
329 file[j]->eor = false;
330 file[j]->eof = false;
332 if (fread(&file[j]->rec, sizeof(recType), 1, file[j]->fp) != 1) {
333 if (feof(file[j]->fp)) {
343 void mergeSort(void) {
349 /* polyphase merge sort */
351 fileT = nTmpFiles - 1;
354 /* prime the files */
355 for (j = 0; j < fileT; j++) {
359 /* each pass through loop merges one run */
368 for (j = 0; j <= fileP; j++) {
369 if (!file[j]->dummy) {
371 if (!file[j]->eof) anyRuns = true;
379 /* merge 1 run file[0]..file[P] --> file[T] */
382 /* each pass thru loop writes 1 record to file[fileT] */
384 /* find smallest key */
386 for (j = 0; j <= fileP; j++) {
387 if (file[j]->eor) continue;
388 if (file[j]->dummy) continue;
390 (k != j && compGT(file[k]->rec.key, file[j]->rec.key)))
395 /* write record[k] to file[fileT] */
396 if (fwrite(&file[k]->rec, sizeof(recType), 1,
397 file[fileT]->fp) != 1) {
402 /* replace record[k] */
403 lastKey = file[k]->rec.key;
404 if (fread(&file[k]->rec, sizeof(recType), 1,
406 /* check for end of run on file[s] */
407 if (compLT(file[k]->rec.key, lastKey))
409 } else if (feof(file[k]->fp)) {
419 for (j = 0; j <= fileP; j++) {
420 if (file[j]->dummy) file[j]->dummy--;
421 if (!file[j]->eof) file[j]->eor = false;
424 } else if (allDummies) {
425 for (j = 0; j <= fileP; j++)
427 file[fileT]->dummy++;
431 if (file[fileP]->eof && !file[fileP]->dummy) {
432 /* completed a fibonocci-level */
435 /* we're done, file[fileT] contains data */
439 /* fileP is exhausted, reopen as new */
440 fclose(file[fileP]->fp);
441 if ((file[fileP]->fp = fopen(file[fileP]->name, "w+b"))
446 file[fileP]->eof = false;
447 file[fileP]->eor = false;
451 /* f[0],f[1]...,f[fileT] <-- f[fileT],f[0]...,f[T-1] */
453 memmove(file + 1, file, fileT * sizeof(tmpFileType*));
457 for (j = 0; j <= fileP; j++)
458 if (!file[j]->eof) file[j]->eor = false;
473 int main(int argc, char *argv[]) {
477 * ext ifName ofName nTmpFiles nNodes
479 * ext in.dat out.dat 5 2000
480 * reads in.dat, sorts using 5 files and 2000 nodes, output to out.dat
483 printf("%s ifName ofName nTmpFiles nNodes\n", argv[0]);
489 nTmpFiles = atoi(argv[3]);
490 nNodes = atoi(argv[4]);
492 printf("extSort: nFiles=%d, nNodes=%d, lrecl=%d\n",
493 nTmpFiles, nNodes, sizeof(recType));