1 """Routines to help recognizing sound files.
3 Function whathdr() recognizes various types of sound file headers.
4 It understands almost all headers that SOX can decode.
6 The return tuple contains the following items, in this order:
7 - file type (as SOX understands it)
8 - sampling rate (0 if unknown or hard to decode)
9 - number of channels (0 if unknown or hard to decode)
10 - number of frames in the file (-1 if unknown or hard to decode)
11 - number of bits/sample, or 'U' for U-LAW, or 'A' for A-LAW
13 If the file doesn't have a recognizable type, it returns None.
14 If the file can't be opened, IOError is raised.
16 To compute the total time, divide the number of frames by the
17 sampling rate (a frame contains a sample for each channel).
19 Function what() calls whathdr(). (It used to also use some
20 heuristics for raw data, but this doesn't work very well.)
22 Finally, the function test() is a simple main program that calls
23 what() for all files mentioned on the argument list. For directory
24 arguments it calls what() for all files in that directory. Default
25 argument is "." (testing all files in the current directory). The
26 option -r tells it to recurse down directories found inside
27 explicitly given directories.
30 # The file structure is top-down except that the test program and its
31 # subroutine come last.
33 __all__
= ['what', 'whathdr']
36 """Guess the type of a sound file."""
37 res
= whathdr(filename
)
41 def whathdr(filename
):
42 """Recognize sound headers."""
43 with
open(filename
, 'rb') as f
:
52 #-----------------------------------#
53 # Subroutines per sound header type #
54 #-----------------------------------#
60 if not h
.startswith(b
'FORM'):
62 if h
[8:12] == b
'AIFC':
64 elif h
[8:12] == b
'AIFF':
71 except (EOFError, aifc
.Error
):
73 return (fmt
, a
.getframerate(), a
.getnchannels(),
74 a
.getnframes(), 8 * a
.getsampwidth())
76 tests
.append(test_aifc
)
80 if h
.startswith(b
'.snd'):
82 elif h
[:4] in (b
'\0ds.', b
'dns.'):
87 hdr_size
= func(h
[4:8])
88 data_size
= func(h
[8:12])
89 encoding
= func(h
[12:16])
91 nchannels
= func(h
[20:24])
92 sample_size
= 1 # default
102 frame_size
= sample_size
* nchannels
104 nframe
= data_size
/ frame_size
107 return filetype
, rate
, nchannels
, nframe
, sample_bits
109 tests
.append(test_au
)
113 if h
[65:69] != b
'FSSD' or h
[128:132] != b
'HCOM':
115 divisor
= get_long_be(h
[144:148])
117 rate
= 22050 / divisor
120 return 'hcom', rate
, 1, -1, 8
122 tests
.append(test_hcom
)
126 if not h
.startswith(b
'Creative Voice File\032'):
128 sbseek
= get_short_le(h
[20:22])
130 if 0 <= sbseek
< 500 and h
[sbseek
] == 1:
131 ratecode
= 256 - h
[sbseek
+4]
133 rate
= int(1000000.0 / ratecode
)
134 return 'voc', rate
, 1, -1, 8
136 tests
.append(test_voc
)
140 # 'RIFF' <len> 'WAVE' 'fmt ' <len>
141 if not h
.startswith(b
'RIFF') or h
[8:12] != b
'WAVE' or h
[12:16] != b
'fmt ':
143 style
= get_short_le(h
[20:22])
144 nchannels
= get_short_le(h
[22:24])
145 rate
= get_long_le(h
[24:28])
146 sample_bits
= get_short_le(h
[34:36])
147 return 'wav', rate
, nchannels
, -1, sample_bits
149 tests
.append(test_wav
)
153 if not h
.startswith(b
'FORM') or h
[8:12] != b
'8SVX':
155 # Should decode it to get #channels -- assume always 1
156 return '8svx', 0, 1, 0, 8
158 tests
.append(test_8svx
)
162 if h
.startswith(b
'SOUND'):
163 nsamples
= get_long_le(h
[8:12])
164 rate
= get_short_le(h
[20:22])
165 return 'sndt', rate
, 1, nsamples
, 8
167 tests
.append(test_sndt
)
171 if h
.startswith(b
'\0\0'):
172 rate
= get_short_le(h
[2:4])
173 if 4000 <= rate
<= 25000:
174 return 'sndr', rate
, 1, -1, 8
176 tests
.append(test_sndr
)
179 #-------------------------------------------#
180 # Subroutines to extract numbers from bytes #
181 #-------------------------------------------#
184 return (b
[0] << 24) |
(b
[1] << 16) |
(b
[2] << 8) | b
[3]
187 return (b
[3] << 24) |
(b
[2] << 16) |
(b
[1] << 8) | b
[0]
190 return (b
[0] << 8) | b
[1]
193 return (b
[1] << 8) | b
[0]
196 #--------------------#
197 # Small test program #
198 #--------------------#
203 if sys
.argv
[1:] and sys
.argv
[1] == '-r':
208 testall(sys
.argv
[1:], recursive
, 1)
210 testall(['.'], recursive
, 1)
211 except KeyboardInterrupt:
212 sys
.stderr
.write('\n[Interrupted]\n')
215 def testall(list, recursive
, toplevel
):
218 for filename
in list:
219 if os
.path
.isdir(filename
):
220 print(filename
+ '/:', end
=' ')
221 if recursive
or toplevel
:
222 print('recursing down:')
224 names
= glob
.glob(os
.path
.join(filename
, '*'))
225 testall(names
, recursive
, 0)
227 print('*** directory (use -r) ***')
229 print(filename
+ ':', end
=' ')
232 print(what(filename
))
234 print('*** not found ***')
236 if __name__
== '__main__':