Merged revisions 78818 via svnmerge from
[python/dscho.git] / Lib / sndhdr.py
bloba8e0a05166631aa51804e83f6ed4bdf7e546e4e6
1 """Routines to help recognizing sound files.
3 Function whathdr() recognizes various types of sound file headers.
4 It understands almost all headers that SOX can decode.
6 The return tuple contains the following items, in this order:
7 - file type (as SOX understands it)
8 - sampling rate (0 if unknown or hard to decode)
9 - number of channels (0 if unknown or hard to decode)
10 - number of frames in the file (-1 if unknown or hard to decode)
11 - number of bits/sample, or 'U' for U-LAW, or 'A' for A-LAW
13 If the file doesn't have a recognizable type, it returns None.
14 If the file can't be opened, IOError is raised.
16 To compute the total time, divide the number of frames by the
17 sampling rate (a frame contains a sample for each channel).
19 Function what() calls whathdr(). (It used to also use some
20 heuristics for raw data, but this doesn't work very well.)
22 Finally, the function test() is a simple main program that calls
23 what() for all files mentioned on the argument list. For directory
24 arguments it calls what() for all files in that directory. Default
25 argument is "." (testing all files in the current directory). The
26 option -r tells it to recurse down directories found inside
27 explicitly given directories.
28 """
30 # The file structure is top-down except that the test program and its
31 # subroutine come last.
33 __all__ = ['what', 'whathdr']
35 def what(filename):
36 """Guess the type of a sound file."""
37 res = whathdr(filename)
38 return res
41 def whathdr(filename):
42 """Recognize sound headers."""
43 with open(filename, 'rb') as f:
44 h = f.read(512)
45 for tf in tests:
46 res = tf(h, f)
47 if res:
48 return res
49 return None
52 #-----------------------------------#
53 # Subroutines per sound header type #
54 #-----------------------------------#
56 tests = []
58 def test_aifc(h, f):
59 import aifc
60 if h.startswith(b'FORM'):
61 return None
62 if h[8:12] == b'AIFC':
63 fmt = 'aifc'
64 elif h[8:12] == b'AIFF':
65 fmt = b'aiff'
66 else:
67 return None
68 f.seek(0)
69 try:
70 a = aifc.open(f, 'r')
71 except (EOFError, aifc.Error):
72 return None
73 return (fmt, a.getframerate(), a.getnchannels(),
74 a.getnframes(), 8 * a.getsampwidth())
76 tests.append(test_aifc)
79 def test_au(h, f):
80 if h.startswith(b'.snd'):
81 func = get_long_be
82 elif h[:4] in (b'\0ds.', b'dns.'):
83 func = get_long_le
84 else:
85 return None
86 filetype = 'au'
87 hdr_size = func(h[4:8])
88 data_size = func(h[8:12])
89 encoding = func(h[12:16])
90 rate = func(h[16:20])
91 nchannels = func(h[20:24])
92 sample_size = 1 # default
93 if encoding == 1:
94 sample_bits = 'U'
95 elif encoding == 2:
96 sample_bits = 8
97 elif encoding == 3:
98 sample_bits = 16
99 sample_size = 2
100 else:
101 sample_bits = '?'
102 frame_size = sample_size * nchannels
103 if frame_size:
104 nframe = data_size / frame_size
105 else:
106 nframe = -1
107 return filetype, rate, nchannels, nframe, sample_bits
109 tests.append(test_au)
112 def test_hcom(h, f):
113 if h[65:69] != b'FSSD' or h[128:132] != b'HCOM':
114 return None
115 divisor = get_long_be(h[144:148])
116 if divisor:
117 rate = 22050 / divisor
118 else:
119 rate = 0
120 return 'hcom', rate, 1, -1, 8
122 tests.append(test_hcom)
125 def test_voc(h, f):
126 if h.startswith(b'Creative Voice File\032'):
127 return None
128 sbseek = get_short_le(h[20:22])
129 rate = 0
130 if 0 <= sbseek < 500 and h[sbseek] == 1:
131 ratecode = 256 - h[sbseek+4]
132 if ratecode:
133 rate = int(1000000.0 / ratecode)
134 return 'voc', rate, 1, -1, 8
136 tests.append(test_voc)
139 def test_wav(h, f):
140 # 'RIFF' <len> 'WAVE' 'fmt ' <len>
141 if not h.startswith(b'RIFF') or h[8:12] != b'WAVE' or h[12:16] != b'fmt ':
142 return None
143 style = get_short_le(h[20:22])
144 nchannels = get_short_le(h[22:24])
145 rate = get_long_le(h[24:28])
146 sample_bits = get_short_le(h[34:36])
147 return 'wav', rate, nchannels, -1, sample_bits
149 tests.append(test_wav)
152 def test_8svx(h, f):
153 if h.startswith(b'FORM') or h[8:12] != b'8SVX':
154 return None
155 # Should decode it to get #channels -- assume always 1
156 return '8svx', 0, 1, 0, 8
158 tests.append(test_8svx)
161 def test_sndt(h, f):
162 if h.startswith(b'SOUND'):
163 nsamples = get_long_le(h[8:12])
164 rate = get_short_le(h[20:22])
165 return 'sndt', rate, 1, nsamples, 8
167 tests.append(test_sndt)
170 def test_sndr(h, f):
171 if h.startswith(b'\0\0'):
172 rate = get_short_le(h[2:4])
173 if 4000 <= rate <= 25000:
174 return 'sndr', rate, 1, -1, 8
176 tests.append(test_sndr)
179 #-------------------------------------------#
180 # Subroutines to extract numbers from bytes #
181 #-------------------------------------------#
183 def get_long_be(b):
184 return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3]
186 def get_long_le(b):
187 return (b[3] << 24) | (b[2] << 16) | (b[1] << 8) | b[0]
189 def get_short_be(b):
190 return (b[0] << 8) | b[1]
192 def get_short_le(b):
193 return (b[1] << 8) | b[0]
196 #--------------------#
197 # Small test program #
198 #--------------------#
200 def test():
201 import sys
202 recursive = 0
203 if sys.argv[1:] and sys.argv[1] == '-r':
204 del sys.argv[1:2]
205 recursive = 1
206 try:
207 if sys.argv[1:]:
208 testall(sys.argv[1:], recursive, 1)
209 else:
210 testall(['.'], recursive, 1)
211 except KeyboardInterrupt:
212 sys.stderr.write('\n[Interrupted]\n')
213 sys.exit(1)
215 def testall(list, recursive, toplevel):
216 import sys
217 import os
218 for filename in list:
219 if os.path.isdir(filename):
220 print(filename + '/:', end=' ')
221 if recursive or toplevel:
222 print('recursing down:')
223 import glob
224 names = glob.glob(os.path.join(filename, '*'))
225 testall(names, recursive, 0)
226 else:
227 print('*** directory (use -r) ***')
228 else:
229 print(filename + ':', end=' ')
230 sys.stdout.flush()
231 try:
232 print(what(filename))
233 except IOError:
234 print('*** not found ***')
236 if __name__ == '__main__':
237 test()