Fix replaygain for wma files which was broken since r29388.
[kugel-rb.git] / apps / metadata / asf.c
blobbcba3f0f79afcb34f21f673d8db5e49f9069d23e
1 /***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
9 * $Id$
11 * Copyright (C) 2007 Dave Chapman
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License
15 * as published by the Free Software Foundation; either version 2
16 * of the License, or (at your option) any later version.
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
21 ****************************************************************************/
22 #include <stdio.h>
23 #include <string.h>
24 #include <stdlib.h>
25 #include <ctype.h>
26 #include <inttypes.h>
28 #include "metadata.h"
29 #include "replaygain.h"
30 #include "debug.h"
31 #include "rbunicode.h"
32 #include "metadata_common.h"
33 #include "metadata_parsers.h"
34 #include "system.h"
35 #include <codecs/libasf/asf.h>
37 /* TODO: Just read the GUIDs into a 16-byte array, and use memcmp to compare */
38 struct guid_s {
39 uint32_t v1;
40 uint16_t v2;
41 uint16_t v3;
42 uint8_t v4[8];
44 typedef struct guid_s guid_t;
46 struct asf_object_s {
47 guid_t guid;
48 uint64_t size;
49 uint64_t datalen;
51 typedef struct asf_object_s asf_object_t;
53 static const guid_t asf_guid_null =
54 {0x00000000, 0x0000, 0x0000, {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}};
56 /* top level object guids */
58 static const guid_t asf_guid_header =
59 {0x75B22630, 0x668E, 0x11CF, {0xA6, 0xD9, 0x00, 0xAA, 0x00, 0x62, 0xCE, 0x6C}};
61 static const guid_t asf_guid_data =
62 {0x75B22636, 0x668E, 0x11CF, {0xA6, 0xD9, 0x00, 0xAA, 0x00, 0x62, 0xCE, 0x6C}};
64 static const guid_t asf_guid_index =
65 {0x33000890, 0xE5B1, 0x11CF, {0x89, 0xF4, 0x00, 0xA0, 0xC9, 0x03, 0x49, 0xCB}};
67 /* header level object guids */
69 static const guid_t asf_guid_file_properties =
70 {0x8cabdca1, 0xa947, 0x11cf, {0x8E, 0xe4, 0x00, 0xC0, 0x0C, 0x20, 0x53, 0x65}};
72 static const guid_t asf_guid_stream_properties =
73 {0xB7DC0791, 0xA9B7, 0x11CF, {0x8E, 0xE6, 0x00, 0xC0, 0x0C, 0x20, 0x53, 0x65}};
75 static const guid_t asf_guid_content_description =
76 {0x75B22633, 0x668E, 0x11CF, {0xA6, 0xD9, 0x00, 0xAA, 0x00, 0x62, 0xCE, 0x6C}};
78 static const guid_t asf_guid_extended_content_description =
79 {0xD2D0A440, 0xE307, 0x11D2, {0x97, 0xF0, 0x00, 0xA0, 0xC9, 0x5E, 0xA8, 0x50}};
81 static const guid_t asf_guid_content_encryption =
82 {0x2211b3fb, 0xbd23, 0x11d2, {0xb4, 0xb7, 0x00, 0xa0, 0xc9, 0x55, 0xfc, 0x6e}};
84 static const guid_t asf_guid_extended_content_encryption =
85 {0x298ae614, 0x2622, 0x4c17, {0xb9, 0x35, 0xda, 0xe0, 0x7e, 0xe9, 0x28, 0x9c}};
87 /* stream type guids */
89 static const guid_t asf_guid_stream_type_audio =
90 {0xF8699E40, 0x5B4D, 0x11CF, {0xA8, 0xFD, 0x00, 0x80, 0x5F, 0x5C, 0x44, 0x2B}};
92 static int asf_guid_match(const guid_t *guid1, const guid_t *guid2)
94 if((guid1->v1 != guid2->v1) ||
95 (guid1->v2 != guid2->v2) ||
96 (guid1->v3 != guid2->v3) ||
97 (memcmp(guid1->v4, guid2->v4, 8))) {
98 return 0;
101 return 1;
104 /* Read the 16 byte GUID from a file */
105 static void asf_readGUID(int fd, guid_t* guid)
107 read_uint32le(fd, &guid->v1);
108 read_uint16le(fd, &guid->v2);
109 read_uint16le(fd, &guid->v3);
110 read(fd, guid->v4, 8);
113 static void asf_read_object_header(asf_object_t *obj, int fd)
115 asf_readGUID(fd, &obj->guid);
116 read_uint64le(fd, &obj->size);
117 obj->datalen = 0;
120 /* Parse an integer from the extended content object - we always
121 convert to an int, regardless of native format.
123 static int asf_intdecode(int fd, int type, int length)
125 uint16_t tmp16;
126 uint32_t tmp32;
127 uint64_t tmp64;
129 if (type == 3) {
130 read_uint32le(fd, &tmp32);
131 lseek(fd,length - 4,SEEK_CUR);
132 return (int)tmp32;
133 } else if (type == 4) {
134 read_uint64le(fd, &tmp64);
135 lseek(fd,length - 8,SEEK_CUR);
136 return (int)tmp64;
137 } else if (type == 5) {
138 read_uint16le(fd, &tmp16);
139 lseek(fd,length - 2,SEEK_CUR);
140 return (int)tmp16;
143 return 0;
146 /* Decode a LE utf16 string from a disk buffer into a fixed-sized
147 utf8 buffer.
150 static void asf_utf16LEdecode(int fd,
151 uint16_t utf16bytes,
152 unsigned char **utf8,
153 int* utf8bytes
156 unsigned long ucs;
157 int n;
158 unsigned char utf16buf[256];
159 unsigned char* utf16 = utf16buf;
160 unsigned char* newutf8;
162 n = read(fd, utf16buf, MIN(sizeof(utf16buf), utf16bytes));
163 utf16bytes -= n;
165 while (n > 0) {
166 /* Check for a surrogate pair */
167 if (utf16[1] >= 0xD8 && utf16[1] < 0xE0) {
168 if (n < 4) {
169 /* Run out of utf16 bytes, read some more */
170 utf16buf[0] = utf16[0];
171 utf16buf[1] = utf16[1];
173 n = read(fd, utf16buf + 2, MIN(sizeof(utf16buf)-2, utf16bytes));
174 utf16 = utf16buf;
175 utf16bytes -= n;
176 n += 2;
179 if (n < 4) {
180 /* Truncated utf16 string, abort */
181 break;
183 ucs = 0x10000 + ((utf16[0] << 10) | ((utf16[1] - 0xD8) << 18)
184 | utf16[2] | ((utf16[3] - 0xDC) << 8));
185 utf16 += 4;
186 n -= 4;
187 } else {
188 ucs = (utf16[0] | (utf16[1] << 8));
189 utf16 += 2;
190 n -= 2;
193 if (*utf8bytes > 6) {
194 newutf8 = utf8encode(ucs, *utf8);
195 *utf8bytes -= (newutf8 - *utf8);
196 *utf8 += (newutf8 - *utf8);
199 /* We have run out of utf16 bytes, read more if available */
200 if ((n == 0) && (utf16bytes > 0)) {
201 n = read(fd, utf16buf, MIN(sizeof(utf16buf), utf16bytes));
202 utf16 = utf16buf;
203 utf16bytes -= n;
207 *utf8[0] = 0;
208 --*utf8bytes;
210 if (utf16bytes > 0) {
211 /* Skip any remaining bytes */
212 lseek(fd, utf16bytes, SEEK_CUR);
214 return;
217 static int asf_parse_header(int fd, struct mp3entry* id3,
218 asf_waveformatex_t* wfx)
220 asf_object_t current;
221 asf_object_t header;
222 uint64_t datalen;
223 int i;
224 int fileprop = 0;
225 uint64_t play_duration;
226 uint16_t flags;
227 uint32_t subobjects;
228 uint8_t utf8buf[512];
229 int id3buf_remaining = sizeof(id3->id3v2buf) + sizeof(id3->id3v1buf);
230 unsigned char* id3buf = (unsigned char*)id3->id3v2buf;
232 asf_read_object_header((asf_object_t *) &header, fd);
234 //DEBUGF("header.size=%d\n",(int)header.size);
235 if (header.size < 30) {
236 /* invalid size for header object */
237 return ASF_ERROR_OBJECT_SIZE;
240 read_uint32le(fd, &subobjects);
242 /* Two reserved bytes - do we need to read them? */
243 lseek(fd, 2, SEEK_CUR);
245 //DEBUGF("Read header - size=%d, subobjects=%d\n",(int)header.size, (int)subobjects);
247 if (subobjects > 0) {
248 header.datalen = header.size - 30;
250 /* TODO: Check that we have datalen bytes left in the file */
251 datalen = header.datalen;
253 for (i=0; i<(int)subobjects; i++) {
254 //DEBUGF("Parsing header object %d - datalen=%d\n",i,(int)datalen);
255 if (datalen < 24) {
256 //DEBUGF("not enough data for reading object\n");
257 break;
260 asf_read_object_header(&current, fd);
262 if (current.size > datalen || current.size < 24) {
263 //DEBUGF("invalid object size - current.size=%d, datalen=%d\n",(int)current.size,(int)datalen);
264 break;
267 if (asf_guid_match(&current.guid, &asf_guid_file_properties)) {
268 if (current.size < 104)
269 return ASF_ERROR_OBJECT_SIZE;
271 if (fileprop) {
272 /* multiple file properties objects not allowed */
273 return ASF_ERROR_INVALID_OBJECT;
276 fileprop = 1;
278 /* Get the number of logical packets - uint16_t at offset 31
279 * (Big endian byte order) */
280 lseek(fd, 31, SEEK_CUR);
281 read_uint16be(fd, &wfx->numpackets);
283 /* Now get the play duration - uint64_t at offset 40 */
284 lseek(fd, 7, SEEK_CUR);
285 read_uint64le(fd, &play_duration);
286 id3->length = play_duration / 10000;
288 //DEBUGF("****** length = %lums\n", id3->length);
290 /* Read the packet size - uint32_t at offset 68 */
291 lseek(fd, 20, SEEK_CUR);
292 read_uint32le(fd, &wfx->packet_size);
294 /* Skip bytes remaining in object */
295 lseek(fd, current.size - 24 - 72, SEEK_CUR);
296 } else if (asf_guid_match(&current.guid, &asf_guid_stream_properties)) {
297 guid_t guid;
298 uint32_t propdatalen;
300 if (current.size < 78)
301 return ASF_ERROR_OBJECT_SIZE;
303 #if 0
304 asf_byteio_getGUID(&guid, current->data);
305 datalen = asf_byteio_getDWLE(current->data + 40);
306 flags = asf_byteio_getWLE(current->data + 48);
307 #endif
309 asf_readGUID(fd, &guid);
311 lseek(fd, 24, SEEK_CUR);
312 read_uint32le(fd, &propdatalen);
313 lseek(fd, 4, SEEK_CUR);
314 read_uint16le(fd, &flags);
316 if (!asf_guid_match(&guid, &asf_guid_stream_type_audio)) {
317 //DEBUGF("Found stream properties for non audio stream, skipping\n");
318 lseek(fd,current.size - 24 - 50,SEEK_CUR);
319 } else if (wfx->audiostream == -1) {
320 lseek(fd, 4, SEEK_CUR);
321 //DEBUGF("Found stream properties for audio stream %d\n",flags&0x7f);
323 if (propdatalen < 18) {
324 return ASF_ERROR_INVALID_LENGTH;
327 #if 0
328 if (asf_byteio_getWLE(data + 16) > datalen - 16) {
329 return ASF_ERROR_INVALID_LENGTH;
331 #endif
332 read_uint16le(fd, &wfx->codec_id);
333 read_uint16le(fd, &wfx->channels);
334 read_uint32le(fd, &wfx->rate);
335 read_uint32le(fd, &wfx->bitrate);
336 wfx->bitrate *= 8;
337 read_uint16le(fd, &wfx->blockalign);
338 read_uint16le(fd, &wfx->bitspersample);
339 read_uint16le(fd, &wfx->datalen);
341 /* Round bitrate to the nearest kbit */
342 id3->bitrate = (wfx->bitrate + 500) / 1000;
343 id3->frequency = wfx->rate;
345 if (wfx->codec_id == ASF_CODEC_ID_WMAV1) {
346 read(fd, wfx->data, 4);
347 lseek(fd,current.size - 24 - 72 - 4,SEEK_CUR);
348 wfx->audiostream = flags&0x7f;
349 } else if (wfx->codec_id == ASF_CODEC_ID_WMAV2) {
350 read(fd, wfx->data, 6);
351 lseek(fd,current.size - 24 - 72 - 6,SEEK_CUR);
352 wfx->audiostream = flags&0x7f;
353 } else if (wfx->codec_id == ASF_CODEC_ID_WMAPRO) {
354 /* wma pro decoder needs the extra-data */
355 read(fd, wfx->data, wfx->datalen);
356 lseek(fd,current.size - 24 - 72 - wfx->datalen,SEEK_CUR);
357 wfx->audiostream = flags&0x7f;
358 /* Correct codectype to redirect playback to the proper .codec */
359 id3->codectype = AFMT_WMAPRO;
360 } else if (wfx->codec_id == ASF_CODEC_ID_WMAVOICE) {
361 read(fd, wfx->data, wfx->datalen);
362 lseek(fd,current.size - 24 - 72 - wfx->datalen,SEEK_CUR);
363 wfx->audiostream = flags&0x7f;
364 id3->codectype = AFMT_WMAVOICE;
365 } else {
366 DEBUGF("Unsupported WMA codec (Lossless, Voice, etc)\n");
367 lseek(fd,current.size - 24 - 72,SEEK_CUR);
371 } else if (asf_guid_match(&current.guid, &asf_guid_content_description)) {
372 /* Object contains five 16-bit string lengths, followed by the five strings:
373 title, artist, copyright, description, rating
375 uint16_t strlength[5];
376 int i;
378 //DEBUGF("Found GUID_CONTENT_DESCRIPTION - size=%d\n",(int)(current.size - 24));
380 /* Read the 5 string lengths - number of bytes included trailing zero */
381 for (i=0; i<5; i++) {
382 read_uint16le(fd, &strlength[i]);
383 //DEBUGF("strlength = %u\n",strlength[i]);
386 if (strlength[0] > 0) { /* 0 - Title */
387 id3->title = id3buf;
388 asf_utf16LEdecode(fd, strlength[0], &id3buf, &id3buf_remaining);
391 if (strlength[1] > 0) { /* 1 - Artist */
392 id3->artist = id3buf;
393 asf_utf16LEdecode(fd, strlength[1], &id3buf, &id3buf_remaining);
396 lseek(fd, strlength[2], SEEK_CUR); /* 2 - copyright */
398 if (strlength[3] > 0) { /* 3 - description */
399 id3->comment = id3buf;
400 asf_utf16LEdecode(fd, strlength[3], &id3buf, &id3buf_remaining);
403 lseek(fd, strlength[4], SEEK_CUR); /* 4 - rating */
404 } else if (asf_guid_match(&current.guid, &asf_guid_extended_content_description)) {
405 uint16_t count;
406 int i;
407 int bytesleft = current.size - 24;
408 //DEBUGF("Found GUID_EXTENDED_CONTENT_DESCRIPTION\n");
410 read_uint16le(fd, &count);
411 bytesleft -= 2;
412 //DEBUGF("extended metadata count = %u\n",count);
414 for (i=0; i < count; i++) {
415 uint16_t length, type;
416 unsigned char* utf8 = utf8buf;
417 int utf8length = 512;
419 read_uint16le(fd, &length);
420 asf_utf16LEdecode(fd, length, &utf8, &utf8length);
421 bytesleft -= 2 + length;
423 read_uint16le(fd, &type);
424 read_uint16le(fd, &length);
426 if (!strcmp("WM/TrackNumber",utf8buf)) {
427 if (type == 0) {
428 id3->track_string = id3buf;
429 asf_utf16LEdecode(fd, length, &id3buf, &id3buf_remaining);
430 id3->tracknum = atoi(id3->track_string);
431 } else if ((type >=2) && (type <= 5)) {
432 id3->tracknum = asf_intdecode(fd, type, length);
433 } else {
434 lseek(fd, length, SEEK_CUR);
436 } else if ((!strcmp("WM/Genre", utf8buf)) && (type == 0)) {
437 id3->genre_string = id3buf;
438 asf_utf16LEdecode(fd, length, &id3buf, &id3buf_remaining);
439 } else if ((!strcmp("WM/AlbumTitle", utf8buf)) && (type == 0)) {
440 id3->album = id3buf;
441 asf_utf16LEdecode(fd, length, &id3buf, &id3buf_remaining);
442 } else if ((!strcmp("WM/AlbumArtist", utf8buf)) && (type == 0)) {
443 id3->albumartist = id3buf;
444 asf_utf16LEdecode(fd, length, &id3buf, &id3buf_remaining);
445 } else if ((!strcmp("WM/Composer", utf8buf)) && (type == 0)) {
446 id3->composer = id3buf;
447 asf_utf16LEdecode(fd, length, &id3buf, &id3buf_remaining);
448 } else if (!strcmp("WM/Year", utf8buf)) {
449 if (type == 0) {
450 id3->year_string = id3buf;
451 asf_utf16LEdecode(fd, length, &id3buf, &id3buf_remaining);
452 id3->year = atoi(id3->year_string);
453 } else if ((type >=2) && (type <= 5)) {
454 id3->year = asf_intdecode(fd, type, length);
455 } else {
456 lseek(fd, length, SEEK_CUR);
458 } else if (!strncmp("replaygain_", utf8buf, 11)) {
459 char *value = id3buf;
460 asf_utf16LEdecode(fd, length, &id3buf, &id3buf_remaining);
461 parse_replaygain(utf8buf, value, id3);
462 } else if (!strcmp("MusicBrainz/Track Id", utf8buf)) {
463 id3->mb_track_id = id3buf;
464 asf_utf16LEdecode(fd, length, &id3buf, &id3buf_remaining);
465 } else {
466 lseek(fd, length, SEEK_CUR);
468 bytesleft -= 4 + length;
471 lseek(fd, bytesleft, SEEK_CUR);
472 } else if (asf_guid_match(&current.guid, &asf_guid_content_encryption)
473 || asf_guid_match(&current.guid, &asf_guid_extended_content_encryption)) {
474 //DEBUGF("File is encrypted\n");
475 return ASF_ERROR_ENCRYPTED;
476 } else {
477 //DEBUGF("Skipping %d bytes of object\n",(int)(current.size - 24));
478 lseek(fd,current.size - 24,SEEK_CUR);
481 //DEBUGF("Parsed object - size = %d\n",(int)current.size);
482 datalen -= current.size;
485 if (i != (int)subobjects || datalen != 0) {
486 //DEBUGF("header data doesn't match given subobject count\n");
487 return ASF_ERROR_INVALID_VALUE;
490 //DEBUGF("%d subobjects read successfully\n", i);
493 #if 0
494 tmp = asf_parse_header_validate(file, &header);
495 if (tmp < 0) {
496 /* header read ok but doesn't validate correctly */
497 return tmp;
499 #endif
501 //DEBUGF("header validated correctly\n");
503 return 0;
506 bool get_asf_metadata(int fd, struct mp3entry* id3)
508 int res;
509 asf_object_t obj;
510 asf_waveformatex_t wfx;
512 wfx.audiostream = -1;
514 res = asf_parse_header(fd, id3, &wfx);
516 if (res < 0) {
517 DEBUGF("ASF: parsing error - %d\n",res);
518 return false;
521 if (wfx.audiostream == -1) {
522 DEBUGF("ASF: No WMA streams found\n");
523 return false;
526 asf_read_object_header(&obj, fd);
528 if (!asf_guid_match(&obj.guid, &asf_guid_data)) {
529 DEBUGF("ASF: No data object found\n");
530 return false;
533 /* Store the current file position - no need to parse the header
534 again in the codec. The +26 skips the rest of the data object
535 header.
537 id3->first_frame_offset = lseek(fd, 0, SEEK_CUR) + 26;
538 id3->filesize = filesize(fd);
539 /* We copy the wfx struct to the MP3 TOC field in the id3 struct so
540 the codec doesn't need to parse the header object again */
541 memcpy(id3->toc, &wfx, sizeof(wfx));
543 return true;