Lib/sndhdr.py

   1 """Routines to help recognizing sound files.
   2
   3 Function whathdr() recognizes various types of sound file headers.
   4 It understands almost all headers that SOX can decode.
   5
   6 The return tuple contains the following items, in this order:
   7 - file type (as SOX understands it)
   8 - sampling rate (0 if unknown or hard to decode)
   9 - number of channels (0 if unknown or hard to decode)
  10 - number of frames in the file (-1 if unknown or hard to decode)
  11 - number of bits/sample, or 'U' for U-LAW, or 'A' for A-LAW
  12
  13 If the file doesn't have a recognizable type, it returns None.
  14 If the file can't be opened, IOError is raised.
  15
  16 To compute the total time, divide the number of frames by the
  17 sampling rate (a frame contains a sample for each channel).
  18
  19 Function what() calls whathdr().  (It used to also use some
  20 heuristics for raw data, but this doesn't work very well.)
  21
  22 Finally, the function test() is a simple main program that calls
  23 what() for all files mentioned on the argument list.  For directory
  24 arguments it calls what() for all files in that directory.  Default
  25 argument is "." (testing all files in the current directory).  The
  26 option -r tells it to recurse down directories found inside
  27 explicitly given directories.
  28 """
  29
  30 # The file structure is top-down except that the test program and its
  31 # subroutine come last.
  32
  33 __all__ = ['what', 'whathdr']
  34
  35 def what(filename):
  36     """Guess the type of a sound file."""
  37     res = whathdr(filename)
  38     return res
  39
  40
  41 def whathdr(filename):
  42     """Recognize sound headers."""
  43     with open(filename, 'rb') as f:
  44         h = f.read(512)
  45         for tf in tests:
  46             res = tf(h, f)
  47             if res:
  48                 return res
  49         return None
  50
  51
  52 #-----------------------------------#
  53 # Subroutines per sound header type #
  54 #-----------------------------------#
  55
  56 tests = []
  57
  58 def test_aifc(h, f):
  59     import aifc
  60     if h.startswith(b'FORM'):
  61         return None
  62     if h[8:12] == b'AIFC':
  63         fmt = 'aifc'
  64     elif h[8:12] == b'AIFF':
  65         fmt = b'aiff'
  66     else:
  67         return None
  68     f.seek(0)
  69     try:
  70         a = aifc.open(f, 'r')
  71     except (EOFError, aifc.Error):
  72         return None
  73     return (fmt, a.getframerate(), a.getnchannels(),
  74             a.getnframes(), 8 * a.getsampwidth())
  75
  76 tests.append(test_aifc)
  77
  78
  79 def test_au(h, f):
  80     if h.startswith(b'.snd'):
  81         func = get_long_be
  82     elif h[:4] in (b'\0ds.', b'dns.'):
  83         func = get_long_le
  84     else:
  85         return None
  86     filetype = 'au'
  87     hdr_size = func(h[4:8])
  88     data_size = func(h[8:12])
  89     encoding = func(h[12:16])
  90     rate = func(h[16:20])
  91     nchannels = func(h[20:24])
  92     sample_size = 1 # default
  93     if encoding == 1:
  94         sample_bits = 'U'
  95     elif encoding == 2:
  96         sample_bits = 8
  97     elif encoding == 3:
  98         sample_bits = 16
  99         sample_size = 2
 100     else:
 101         sample_bits = '?'
 102     frame_size = sample_size * nchannels
 103     if frame_size:
 104         nframe = data_size / frame_size
 105     else:
 106         nframe = -1
 107     return filetype, rate, nchannels, nframe, sample_bits
 108
 109 tests.append(test_au)
 110
 111
 112 def test_hcom(h, f):
 113     if h[65:69] != b'FSSD' or h[128:132] != b'HCOM':
 114         return None
 115     divisor = get_long_be(h[144:148])
 116     if divisor:
 117         rate = 22050 / divisor
 118     else:
 119         rate = 0
 120     return 'hcom', rate, 1, -1, 8
 121
 122 tests.append(test_hcom)
 123
 124
 125 def test_voc(h, f):
 126     if h.startswith(b'Creative Voice File\032'):
 127         return None
 128     sbseek = get_short_le(h[20:22])
 129     rate = 0
 130     if 0 <= sbseek < 500 and h[sbseek] == 1:
 131         ratecode = 256 - h[sbseek+4]
 132         if ratecode:
 133             rate = int(1000000.0 / ratecode)
 134     return 'voc', rate, 1, -1, 8
 135
 136 tests.append(test_voc)
 137
 138
 139 def test_wav(h, f):
 140     # 'RIFF' <len> 'WAVE' 'fmt ' <len>
 141     if not h.startswith(b'RIFF') or h[8:12] != b'WAVE' or h[12:16] != b'fmt ':
 142         return None
 143     style = get_short_le(h[20:22])
 144     nchannels = get_short_le(h[22:24])
 145     rate = get_long_le(h[24:28])
 146     sample_bits = get_short_le(h[34:36])
 147     return 'wav', rate, nchannels, -1, sample_bits
 148
 149 tests.append(test_wav)
 150
 151
 152 def test_8svx(h, f):
 153     if h.startswith(b'FORM') or h[8:12] != b'8SVX':
 154         return None
 155     # Should decode it to get #channels -- assume always 1
 156     return '8svx', 0, 1, 0, 8
 157
 158 tests.append(test_8svx)
 159
 160
 161 def test_sndt(h, f):
 162     if h.startswith(b'SOUND'):
 163         nsamples = get_long_le(h[8:12])
 164         rate = get_short_le(h[20:22])
 165         return 'sndt', rate, 1, nsamples, 8
 166
 167 tests.append(test_sndt)
 168
 169
 170 def test_sndr(h, f):
 171     if h.startswith(b'\0\0'):
 172         rate = get_short_le(h[2:4])
 173         if 4000 <= rate <= 25000:
 174             return 'sndr', rate, 1, -1, 8
 175
 176 tests.append(test_sndr)
 177
 178
 179 #-------------------------------------------#
 180 # Subroutines to extract numbers from bytes #
 181 #-------------------------------------------#
 182
 183 def get_long_be(b):
 184     return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3]
 185
 186 def get_long_le(b):
 187     return (b[3] << 24) | (b[2] << 16) | (b[1] << 8) | b[0]
 188
 189 def get_short_be(b):
 190     return (b[0] << 8) | b[1]
 191
 192 def get_short_le(b):
 193     return (b[1] << 8) | b[0]
 194
 195
 196 #--------------------#
 197 # Small test program #
 198 #--------------------#
 199
 200 def test():
 201     import sys
 202     recursive = 0
 203     if sys.argv[1:] and sys.argv[1] == '-r':
 204         del sys.argv[1:2]
 205         recursive = 1
 206     try:
 207         if sys.argv[1:]:
 208             testall(sys.argv[1:], recursive, 1)
 209         else:
 210             testall(['.'], recursive, 1)
 211     except KeyboardInterrupt:
 212         sys.stderr.write('\n[Interrupted]\n')
 213         sys.exit(1)
 214
 215 def testall(list, recursive, toplevel):
 216     import sys
 217     import os
 218     for filename in list:
 219         if os.path.isdir(filename):
 220             print(filename + '/:', end=' ')
 221             if recursive or toplevel:
 222                 print('recursing down:')
 223                 import glob
 224                 names = glob.glob(os.path.join(filename, '*'))
 225                 testall(names, recursive, 0)
 226             else:
 227                 print('*** directory (use -r) ***')
 228         else:
 229             print(filename + ':', end=' ')
 230             sys.stdout.flush()
 231             try:
 232                 print(what(filename))
 233             except IOError:
 234                 print('*** not found ***')
 235
 236 if __name__ == '__main__':
 237     test()