1 /***********************************************************************
3 * This software is part of the ast package *
4 * Copyright (c) 1985-2010 AT&T Intellectual Property *
5 * and is licensed under the *
6 * Common Public License, Version 1.0 *
7 * by AT&T Intellectual Property *
9 * A copy of the License is available at *
10 * http://www.opensource.org/licenses/cpl1.0.txt *
11 * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
13 * Information and Software Systems Research *
17 * Glenn Fowler <gsf@research.att.com> *
18 * David Korn <dgk@research.att.com> *
19 * Phong Vo <kpv@research.att.com> *
21 ***********************************************************************/
25 * determine record format by sampling data in <buf,size>
26 * total is the total file size, <=0 if not available
28 * -1 could not determine
29 * RECTYPE(r)==REC_fixed fixed length REC_F_SIZE(r)
30 * RECTYPE(r)==REC_delimited variable length delimiter=REC_D_DELIMITER(r)
31 * RECTYPE(r)==REC_variable variable length
38 unsigned int rep
[4 * 1024];
39 unsigned int hit
[UCHAR_MAX
+ 1];
43 recfmt(const void* buf
, size_t size
, off_t total
)
45 register unsigned char* s
;
46 register unsigned char* t
;
48 register unsigned int* h
;
49 register unsigned int i
;
58 static unsigned char terminators
[] = { '\n', 0x15, 0x25 };
64 s
= (unsigned char*)buf
;
66 while ((k
= (t
- s
)) >= 4 && !s
[2] && !s
[3])
68 if ((i
= (s
[0]<<8)|s
[1]) > k
)
72 if (!k
|| size
> 2 * k
)
73 return REC_V_TYPE(4, 0, 2, 0, 1);
74 s
= (unsigned char*)buf
;
77 * check for terminated records
80 for (i
= 0; i
< elementsof(terminators
); i
++)
81 if ((t
= (unsigned char*)memchr((void*)s
, k
= terminators
[i
], size
/ 2)) && (n
= t
- s
+ 1) > 1 && (total
<= 0 || !(total
% n
)))
83 for (j
= n
- 1; j
< size
; j
+= n
)
90 return REC_D_TYPE(terminators
[i
]);
94 * check fixed length record frequencies
97 if (!(q
= newof(0, Sample_t
, 1, 0)))
100 for (i
= 0; i
< size
; i
++)
105 if (m
< elementsof(q
->rep
))
115 for (i
= x
; i
> 1; i
--)
117 if ((total
<= 0 || !(total
% i
)) && q
->rep
[i
] > q
->rep
[n
])
121 for (j
= i
; j
< size
- i
; j
+= i
)
122 for (k
= 0; k
< i
; k
++)
123 if (s
[j
+ k
] != s
[j
+ k
- i
])
125 g
= (((g
* 100) / i
) * 100) / q
->rep
[i
];
133 if (m
<= 1 && n
<= 2 && total
> 1 && total
< 256)
136 for (i
= 0; i
< size
; i
++)
137 for (j
= 0; j
< elementsof(terminators
); j
++)
138 if (s
[i
] == terminators
[j
])
143 return n
? REC_F_TYPE(n
) : REC_N_TYPE();
154 if (!(s
= sfreserve(sfstdin
, SF_UNBOUND
, 0)))
156 sfprintf(sfstderr
, "read error\n");
159 size
= sfvalue(sfstdin
);
160 total
= sfsize(sfstdin
);
161 sfprintf(sfstdout
, "%d\n", recfmt(s
, size
, total
));