2 * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
28 * For further information regarding this notice, see:
30 * http://oss.sgi.com/projects/GenInfo/NoticeExplan/
33 /* $Id: rand_lines.c,v 1.5 2002/09/16 15:02:57 nstraz Exp $ */
34 /**************************************************************
36 * OS Testing - Silicon Graphics, Inc.
38 * TOOL IDENTIFIER : rand_lines
40 * DESCRIPTION : prints lines from a file in random order
43 * rand_line [-hg][-S seed][-l numlines] [files...]
45 * AUTHOR : Richard Logan
49 * DATE STARTED : 05/94
51 * INPUT SPECIFICATIONS
52 * This tool will print lines of a file in random order.
53 * The max line length is 4096.
54 * The options supported are:
55 * -h This option prints an help message then exits.
57 * -g This option specifies to count the number of lines
58 * in the file before randomizing. This option overrides
59 * -l option. Using this option, will give you the best
60 * randomization, but it requires processing
61 * the file an additional time.
63 * -l numlines : This option specifies to randomize file in
64 * numlines chucks. The default size is 4096.
66 * -S seed : sets randomization seed to seed.
67 * The default is time(0). If seed is zero, time(0) is used.
69 * file A readable, seekable filename. The cmd allows the user
70 * to specify multiple files, but each file is dealt with
74 * This tool uses a simple algorithm where the file is read.
75 * The offset to the each line is randomly placed into an
76 * array. The array is then processed sequentially. The infile's
77 * line who's offset in the array element is thus reread then printed.
78 * This output will thus be infile's lines in random order.
80 * SPECIAL REQUIREMENTS
84 * This should contain the description, author, and date of any
85 * "interesting" modifications (i.e. info should helpful in
86 * maintaining/enhancing this tool).
87 * username description
88 * ----------------------------------------------------------------
89 * rrl Creatation of program
90 * rrl 06/02 Fixed bug and some cleanup. Changed default chunk
91 * and line size to 4096 characters.
94 * This program can not deal with non-seekable file like
95 * stdin or a pipe. If more than one file is specified,
96 * each file is randomized one at a time. The max line
97 * length is 4096 characters.
99 **************************************************************/
108 #include "random_range.h"
111 * Structure used to hold file line offset.
118 void usage(FILE *stream
);
120 int rnd_file(FILE *infile
, int numlines
, long seed
);
121 int get_numlines(FILE *infile
);
122 int rnd_insert(struct offset_t offsets
[], long offset
, int size
);
124 #define DEF_SIZE 4096 /* default chunk size */
125 #define MAX_LN_SZ 4096 /* max line size */
131 char *Progname
= NULL
;
133 /***********************************************************************
135 ***********************************************************************/
143 long seed
= -1; /* use time as seed */
144 int lsize
= DEF_SIZE
; /* num lines to randomize */
145 int getfilelines
= 0; /* if set, count lines first */
147 if ((Progname
= strrchr(argv
[0], '/')) == NULL
)
152 while ((c
= getopt (argc
, argv
, "hgS:l:")) != EOF
){
159 if ( sscanf(optarg
, "%li", &seed
) != 1 ) {
160 fprintf(stderr
, "%s: --S option argument is invalid\n", Progname
);
165 case 'l': /* number of lines */
166 if ( sscanf(optarg
, "%i", &lsize
) != 1 ) {
167 fprintf(stderr
, "%s: --s option argument is invalid\n", Progname
);
183 if ( optind
+ 1 != argc
) {
184 fprintf(stderr
, "%s: Missing argument.\n", Progname
);
193 if ( strcmp(argv
[argc
-1],"-") == 0 ) {
195 fprintf(stderr
, "%s: Can not support stdin processing.\n",
201 if ((infile
=fopen(argv
[argc
-1], "r")) == NULL
) {
202 fprintf(stderr
, "%s: Unable to open file %s: %s\n",
203 Progname
, argv
[argc
-1], strerror(errno
));
207 if ( getfilelines
) {
208 lsize
=get_numlines(infile
);
211 rnd_file(infile
, lsize
, seed
);
217 /***********************************************************************
218 * Print usage message to stream.
219 ***********************************************************************/
224 "Usage %s [-hg][-S seed][-l numlines] [files...]\n", Progname
);
228 /***********************************************************************
229 * Print help message to stdout.
230 ***********************************************************************/
235 printf("This tool will print lines in random order (max line len %d).\n\
236 -h : print this help and exit\n\
237 -g : count the number of lines in the file before randomizing\n\
238 This option overrides -l option.\n\
239 -l numlines : randoms lines in numlines chuncks (def %d)\n\
240 -S seed : sets seed to seed (def time(0))\n",
241 MAX_LN_SZ
, DEF_SIZE
);
245 /***********************************************************************
246 * counts the number of lines in already open file.
247 * Note: File must be seekable (not stdin or a pipe).
248 ***********************************************************************/
253 char line
[MAX_LN_SZ
]; /* max size of a line */
256 while ( fgets(line
, MAX_LN_SZ
, infile
) != NULL
) {
260 /* rewind the file */
261 fseek(infile
, 0, SEEK_SET
);
266 /***********************************************************************
268 * infile must be a fseekable file. Thus, it can not be stdin.
269 * It will read each line in the file, randomly saving the offset
270 * of each line in a array of struct offset_t.
271 * It will then print each line in the array stored order.
273 ***********************************************************************/
275 rnd_file(infile
, numlines
, seed
)
277 int numlines
; /* can be more or less than num lines in file */
278 /* most opt randomized when num lines in files */
279 /* or just a bit bigger */
283 char line
[MAX_LN_SZ
]; /* max size of a line */
285 long coffset
; /* current line offset */
287 struct offset_t
*offsets
;
290 if ( numlines
<= 0 ) { /*use default */
295 * Malloc space for numlines copies the offset_t structure.
296 * This is where the randomization takes place.
298 memsize
= sizeof(struct offset_t
)*numlines
;
300 if ((offsets
=(struct offset_t
*)malloc(memsize
)) == NULL
) {
301 fprintf(stderr
, "Unable to malloc(%d): errno:%d\n", memsize
, errno
);
305 random_range_seed(seed
);
309 while ( ! feof(infile
) ) {
311 fseek(infile
, coffset
, SEEK_SET
);
312 coffset
=ftell(infile
);
313 memset(offsets
, 0, memsize
);
317 * read the file in and place offset of each line randomly
318 * into offsets array. Only numlines line can be randomized
321 while ( cnt
< numlines
&& fgets(line
, MAX_LN_SZ
, infile
) != NULL
) {
323 if ( rnd_insert(offsets
, coffset
, numlines
) < 0 ) {
324 fprintf(stderr
, "%s:%d rnd_insert() returned -1 (fatal error)!\n",
330 coffset
=ftell(infile
);
338 * print out lines based on offset.
340 for (cnt
=0; cnt
<numlines
; cnt
++) {
342 if ( offsets
[cnt
].used
) {
343 fseek(infile
, offsets
[cnt
].offset
, SEEK_SET
);
344 fgets(line
, MAX_LN_SZ
, infile
);
354 /***********************************************************************
355 * This function randomly inserts offset information into
356 * the offsets array. The array has a size of size.
357 * It will attempt 75 random array indexes before finding the first
358 * open array element.
360 ***********************************************************************/
362 rnd_insert(offsets
, offset
, size
)
363 struct offset_t offsets
[];
372 * Loop looking for random unused index.
373 * It will only be attempted 75 times.
375 while ( quick
< 75 ) {
377 rand_num
=random_range(0, size
-1, 1, NULL
);
379 if ( ! offsets
[rand_num
].used
) {
380 offsets
[rand_num
].offset
=offset
;
381 offsets
[rand_num
].used
++;
388 * an randomly choosen index was not found, find
389 * first open index and use it.
391 for (ind
=0; ind
< size
&& offsets
[ind
].used
!= 0; ind
++)
396 * If called with an array where all offsets are used,
397 * we won't be able to find an open array location.
398 * Thus, return -1 indicating the error.
399 * This should never happen if called correctly.
404 offsets
[ind
].offset
=offset
;
412 /***********************************************************************
414 * CODE NOT TESTED AT ALL - it must be tested before it is used.
416 * This function was written to allow rand_lines to work on non-seekable
419 ***********************************************************************/
421 rnd_stdin(infile
, space
, numlines
, seed
)
423 int space
; /* amount of space to use to read file into memory, */
424 /* randomized and print. randomize in chunks */
425 int numlines
; /* can be more or less than num lines in file */
426 /* most opt randomized when num lines in files */
427 /* or just a bit bigger */
431 char line
[MAX_LN_SZ
]; /* max size of a line */
432 int cnt
; /* offset printer counter */
433 long loffset
; /* last line address */
434 char *buffer
; /* malloc space for file reads */
435 char *rdbuff
; /* where to start read */
436 long stopaddr
; /* end of read space (address)*/
437 int rdsz
; /* amount read */
439 char *chr
; /* buffer processing pointer */
440 char *ptr
; /* printing processing pointer */
441 char *lptr
; /* printing processing pointer */
442 int loopcntl
= 1; /* main loop control flag */
443 struct offset_t
*offsets
; /* pointer to offset space */
444 int memsize
; /* amount of offset space to malloc */
445 int newbuffer
= 1; /* need new buffer */
447 if ( numlines
<= 0 ) { /*use default */
452 * Malloc space for file contents
454 if ((buffer
=(char *)malloc(space
)) == NULL
) {
455 fprintf(stderr
, "Unable to malloc(%d): errno:%d\n", space
, errno
);
460 * Malloc space for numlines copies the offset_t structure.
461 * This is where the randomization takes place.
463 memsize
= sizeof(struct offset_t
)*numlines
;
465 if ((offsets
=(struct offset_t
*)malloc(memsize
)) == NULL
) {
466 fprintf(stderr
, "Unable to malloc(%d): errno:%d\n", memsize
, errno
);
470 random_range_seed(seed
);
471 rdbuff
= buffer
; /* read into start of buffer */
472 sztord
= space
; /* amount of space left in buffer */
475 * Loop until read doesn't read anything
476 * If last line does not end in newline, it is not printed
480 * read in file up to space size
481 * only works if used as filter.
482 * The code will randomize one reads worth at a time.
483 * If typing in lines, read will read only one line - no randomizing.
487 if ((rdsz
=fread((void *)rdbuff
, sztord
, 1, infile
)) == 0 ) {
488 fprintf(stderr
, "input file is empty, done randomizing\n");
493 stopaddr
= ((long)buffer
+ rdsz
);
495 loffset
= (long)buffer
;
497 while ( ! newbuffer
) {
499 while ( (long)chr
< stopaddr
&& *chr
!= '\n' )
504 if ( (long)chr
>= stopaddr
) {
506 fprintf(stderr
, "end of read in buffer\n");
509 * print out lines based on offset.
511 for (cnt
=0; cnt
<numlines
; cnt
++) {
513 if ( offsets
[cnt
].used
) {
514 ptr
= (char *)offsets
[cnt
].offset
;
516 * copy buffer characters into line for printing
519 while ( *ptr
!= '\n' )
522 printf("%s\n", line
);
527 * move start of partically read line to beginning of buffer
528 * and adjust rdbuff to end of partically read line
530 memcpy((void *)loffset
, buffer
, (stopaddr
- loffset
));
531 rdbuff
= buffer
+ (stopaddr
- loffset
);
532 sztord
= space
- (stopaddr
- loffset
);
537 if ( rnd_insert(offsets
, loffset
, numlines
) < 0 ) {
538 fprintf(stderr
, "%s:%d rnd_insert() returned -1 (fatal error)!\n",