Lib/sndhdr.py

   1 """Routines to help recognizing sound files.
   2
   3 Function whathdr() recognizes various types of sound file headers.
   4 It understands almost all headers that SOX can decode.
   5
   6 The return tuple contains the following items, in this order:
   7 - file type (as SOX understands it)
   8 - sampling rate (0 if unknown or hard to decode)
   9 - number of channels (0 if unknown or hard to decode)
  10 - number of frames in the file (-1 if unknown or hard to decode)
  11 - number of bits/sample, or 'U' for U-LAW, or 'A' for A-LAW
  12
  13 If the file doesn't have a recognizable type, it returns None.
  14 If the file can't be opened, IOError is raised.
  15
  16 To compute the total time, divide the number of frames by the
  17 sampling rate (a frame contains a sample for each channel).
  18
  19 Function what() calls whathdr().  (It used to also use some
  20 heuristics for raw data, but this doesn't work very well.)
  21
  22 Finally, the function test() is a simple main program that calls
  23 what() for all files mentioned on the argument list.  For directory
  24 arguments it calls what() for all files in that directory.  Default
  25 argument is "." (testing all files in the current directory).  The
  26 option -r tells it to recurse down directories found inside
  27 explicitly given directories.
  28 """
  29
  30 # The file structure is top-down except that the test program and its
  31 # subroutine come last.
  32
  33 __all__ = ["what","whathdr"]
  34
  35 def what(filename):
  36     """Guess the type of a sound file"""
  37     res = whathdr(filename)
  38     return res
  39
  40
  41 def whathdr(filename):
  42     """Recognize sound headers"""
  43     f = open(filename, 'rb')
  44     h = f.read(512)
  45     for tf in tests:
  46         res = tf(h, f)
  47         if res:
  48             return res
  49     return None
  50
  51
  52 #-----------------------------------#
  53 # Subroutines per sound header type #
  54 #-----------------------------------#
  55
  56 tests = []
  57
  58 def test_aifc(h, f):
  59     import aifc
  60     if h[:4] != 'FORM':
  61         return None
  62     if h[8:12] == 'AIFC':
  63         fmt = 'aifc'
  64     elif h[8:12] == 'AIFF':
  65         fmt = 'aiff'
  66     else:
  67         return None
  68     f.seek(0)
  69     try:
  70         a = aifc.openfp(f, 'r')
  71     except (EOFError, aifc.Error):
  72         return None
  73     return (fmt, a.getframerate(), a.getnchannels(), \
  74             a.getnframes(), 8*a.getsampwidth())
  75
  76 tests.append(test_aifc)
  77
  78
  79 def test_au(h, f):
  80     if h[:4] == '.snd':
  81         f = get_long_be
  82     elif h[:4] in ('\0ds.', 'dns.'):
  83         f = get_long_le
  84     else:
  85         return None
  86     type = 'au'
  87     hdr_size = f(h[4:8])
  88     data_size = f(h[8:12])
  89     encoding = f(h[12:16])
  90     rate = f(h[16:20])
  91     nchannels = f(h[20:24])
  92     sample_size = 1 # default
  93     if encoding == 1:
  94         sample_bits = 'U'
  95     elif encoding == 2:
  96         sample_bits = 8
  97     elif encoding == 3:
  98         sample_bits = 16
  99         sample_size = 2
 100     else:
 101         sample_bits = '?'
 102     frame_size = sample_size * nchannels
 103     return type, rate, nchannels, data_size/frame_size, sample_bits
 104
 105 tests.append(test_au)
 106
 107
 108 def test_hcom(h, f):
 109     if h[65:69] != 'FSSD' or h[128:132] != 'HCOM':
 110         return None
 111     divisor = get_long_be(h[128+16:128+20])
 112     return 'hcom', 22050/divisor, 1, -1, 8
 113
 114 tests.append(test_hcom)
 115
 116
 117 def test_voc(h, f):
 118     if h[:20] != 'Creative Voice File\032':
 119         return None
 120     sbseek = get_short_le(h[20:22])
 121     rate = 0
 122     if 0 <= sbseek < 500 and h[sbseek] == '\1':
 123         ratecode = ord(h[sbseek+4])
 124         rate = int(1000000.0 / (256 - ratecode))
 125     return 'voc', rate, 1, -1, 8
 126
 127 tests.append(test_voc)
 128
 129
 130 def test_wav(h, f):
 131     # 'RIFF' <len> 'WAVE' 'fmt ' <len>
 132     if h[:4] != 'RIFF' or h[8:12] != 'WAVE' or h[12:16] != 'fmt ':
 133         return None
 134     style = get_short_le(h[20:22])
 135     nchannels = get_short_le(h[22:24])
 136     rate = get_long_le(h[24:28])
 137     sample_bits = get_short_le(h[34:36])
 138     return 'wav', rate, nchannels, -1, sample_bits
 139
 140 tests.append(test_wav)
 141
 142
 143 def test_8svx(h, f):
 144     if h[:4] != 'FORM' or h[8:12] != '8SVX':
 145         return None
 146     # Should decode it to get #channels -- assume always 1
 147     return '8svx', 0, 1, 0, 8
 148
 149 tests.append(test_8svx)
 150
 151
 152 def test_sndt(h, f):
 153     if h[:5] == 'SOUND':
 154         nsamples = get_long_le(h[8:12])
 155         rate = get_short_le(h[20:22])
 156         return 'sndt', rate, 1, nsamples, 8
 157
 158 tests.append(test_sndt)
 159
 160
 161 def test_sndr(h, f):
 162     if h[:2] == '\0\0':
 163         rate = get_short_le(h[2:4])
 164         if 4000 <= rate <= 25000:
 165             return 'sndr', rate, 1, -1, 8
 166
 167 tests.append(test_sndr)
 168
 169
 170 #---------------------------------------------#
 171 # Subroutines to extract numbers from strings #
 172 #---------------------------------------------#
 173
 174 def get_long_be(s):
 175     return (ord(s[0])<<24) | (ord(s[1])<<16) | (ord(s[2])<<8) | ord(s[3])
 176
 177 def get_long_le(s):
 178     return (ord(s[3])<<24) | (ord(s[2])<<16) | (ord(s[1])<<8) | ord(s[0])
 179
 180 def get_short_be(s):
 181     return (ord(s[0])<<8) | ord(s[1])
 182
 183 def get_short_le(s):
 184     return (ord(s[1])<<8) | ord(s[0])
 185
 186
 187 #--------------------#
 188 # Small test program #
 189 #--------------------#
 190
 191 def test():
 192     import sys
 193     recursive = 0
 194     if sys.argv[1:] and sys.argv[1] == '-r':
 195         del sys.argv[1:2]
 196         recursive = 1
 197     try:
 198         if sys.argv[1:]:
 199             testall(sys.argv[1:], recursive, 1)
 200         else:
 201             testall(['.'], recursive, 1)
 202     except KeyboardInterrupt:
 203         sys.stderr.write('\n[Interrupted]\n')
 204         sys.exit(1)
 205
 206 def testall(list, recursive, toplevel):
 207     import sys
 208     import os
 209     for filename in list:
 210         if os.path.isdir(filename):
 211             print filename + '/:',
 212             if recursive or toplevel:
 213                 print 'recursing down:'
 214                 import glob
 215                 names = glob.glob(os.path.join(filename, '*'))
 216                 testall(names, recursive, 0)
 217             else:
 218                 print '*** directory (use -r) ***'
 219         else:
 220             print filename + ':',
 221             sys.stdout.flush()
 222             try:
 223                 print what(filename)
 224             except IOError:
 225                 print '*** not found ***'
 226
 227 if __name__ == '__main__':
 228     test()