Fix FS#11955. ID3 tag parser was broken since r29349. Trimming strings must be done...
[kugel-rb.git] / apps / metadata / id3tags.c
blobba6a158080f305d661e3f52f6a9f4b44b8b3a738
1 /***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
10 * Copyright (C) 2002 by Daniel Stenberg
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version 2
15 * of the License, or (at your option) any later version.
17 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
18 * KIND, either express or implied.
20 ****************************************************************************/
22 * Parts of this code has been stolen from the Ample project and was written
23 * by David H�deman. It has since been extended and enhanced pretty much by
24 * all sorts of friendly Rockbox people.
28 /* tagResolver and associated code copyright 2003 Thomas Paul Diffenbach
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <errno.h>
35 #include <stdbool.h>
36 #include <stddef.h>
37 #include <ctype.h>
38 #include "string-extra.h"
39 #include "config.h"
40 #include "file.h"
41 #include "logf.h"
42 #include "system.h"
43 #include "replaygain.h"
44 #include "rbunicode.h"
46 #include "metadata.h"
47 #include "mp3data.h"
48 #if CONFIG_CODEC == SWCODEC
49 #include "metadata_common.h"
50 #endif
51 #include "metadata_parsers.h"
53 static unsigned long unsync(unsigned long b0,
54 unsigned long b1,
55 unsigned long b2,
56 unsigned long b3)
58 return (((long)(b0 & 0x7F) << (3*7)) |
59 ((long)(b1 & 0x7F) << (2*7)) |
60 ((long)(b2 & 0x7F) << (1*7)) |
61 ((long)(b3 & 0x7F) << (0*7)));
64 static const char* const genres[] = {
65 "Blues", "Classic Rock", "Country", "Dance", "Disco", "Funk", "Grunge",
66 "Hip-Hop", "Jazz", "Metal", "New Age", "Oldies", "Other", "Pop", "R&B",
67 "Rap", "Reggae", "Rock", "Techno", "Industrial", "Alternative", "Ska",
68 "Death Metal", "Pranks", "Soundtrack", "Euro-Techno", "Ambient", "Trip-Hop",
69 "Vocal", "Jazz+Funk", "Fusion", "Trance", "Classical", "Instrumental",
70 "Acid", "House", "Game", "Sound Clip", "Gospel", "Noise", "AlternRock",
71 "Bass", "Soul", "Punk", "Space", "Meditative", "Instrumental Pop",
72 "Instrumental Rock", "Ethnic", "Gothic", "Darkwave", "Techno-Industrial",
73 "Electronic", "Pop-Folk", "Eurodance", "Dream", "Southern Rock", "Comedy",
74 "Cult", "Gangsta", "Top 40", "Christian Rap", "Pop/Funk", "Jungle",
75 "Native American", "Cabaret", "New Wave", "Psychadelic", "Rave",
76 "Showtunes", "Trailer", "Lo-Fi", "Tribal", "Acid Punk", "Acid Jazz",
77 "Polka", "Retro", "Musical", "Rock & Roll", "Hard Rock",
79 /* winamp extensions */
80 "Folk", "Folk-Rock", "National Folk", "Swing", "Fast Fusion", "Bebob",
81 "Latin", "Revival", "Celtic", "Bluegrass", "Avantgarde", "Gothic Rock",
82 "Progressive Rock", "Psychedelic Rock", "Symphonic Rock", "Slow Rock",
83 "Big Band", "Chorus", "Easy Listening", "Acoustic", "Humour", "Speech",
84 "Chanson", "Opera", "Chamber Music", "Sonata", "Symphony", "Booty Bass",
85 "Primus", "Porn Groove", "Satire", "Slow Jam", "Club", "Tango", "Samba",
86 "Folklore", "Ballad", "Power Ballad", "Rhythmic Soul", "Freestyle",
87 "Duet", "Punk Rock", "Drum Solo", "A capella", "Euro-House", "Dance Hall",
88 "Goa", "Drum & Bass", "Club-House", "Hardcore", "Terror", "Indie",
89 "BritPop", "Negerpunk", "Polsk Punk", "Beat", "Christian Gangsta Rap",
90 "Heavy Metal", "Black Metal", "Crossover", "Contemporary Christian",
91 "Christian Rock", "Merengue", "Salsa", "Thrash Metal", "Anime", "Jpop",
92 "Synthpop"
95 char* id3_get_num_genre(unsigned int genre_num)
97 if (genre_num < ARRAYLEN(genres))
98 return (char*)genres[genre_num];
99 return NULL;
103 HOW TO ADD ADDITIONAL ID3 VERSION 2 TAGS
104 Code and comments by Thomas Paul Diffenbach
106 To add another ID3v2 Tag, do the following:
107 1. add a char* named for the tag to struct mp3entry in id3.h,
108 (I (tpd) prefer to use char* rather than ints, even for what seems like
109 numerical values, for cases where a number won't do, e.g.,
110 YEAR: "circa 1765", "1790/1977" (composed/performed), "28 Feb 1969"
111 TRACK: "1/12", "1 of 12", GENRE: "Freeform genre name"
112 Text is more flexible, and as the main use of id3 data is to
113 display it, converting it to an int just means reconverting to
114 display it, at a runtime cost.)
116 2. If any special processing beyond copying the tag value from the Id3
117 block to the struct mp3entry is rrequired (such as converting to an
118 int), write a function to perform this special processing.
120 This function's prototype must match that of
121 typedef tagPostProcessFunc, that is it must be:
122 int func( struct mp3entry*, char* tag, int bufferpos )
123 the first argument is a pointer to the current mp3entry structure the
124 second argument is a pointer to the null terminated string value of the
125 tag found the third argument is the offset of the next free byte in the
126 mp3entry's buffer your function should return the corrected offset; if
127 you don't lengthen or shorten the tag string, you can return the third
128 argument unchanged.
130 Unless you have a good reason no to, make the function static.
131 TO JUST COPY THE TAG NO SPECIAL PROCESSING FUNCTION IS NEEDED.
133 3. add one or more entries to the tagList array, using the format:
134 char* ID3 Tag symbolic name -- see the ID3 specification for these,
135 sizeof() that name minus 1,
136 offsetof( struct mp3entry, variable_name_in_struct_mp3entry ),
137 pointer to your special processing function or NULL
138 if you need no special processing
139 flag indicating if this tag is binary or textual
140 Many ID3 symbolic names come in more than one form. You can add both
141 forms, each referencing the same variable in struct mp3entry.
142 If both forms are present, the last found will be used.
143 Note that the offset can be zero, in which case no entry will be set
144 in the mp3entry struct; the frame is still read into the buffer and
145 the special processing function is called (several times, if there
146 are several frames with the same name).
148 4. Alternately, use the TAG_LIST_ENTRY macro with
149 ID3 tag symbolic name,
150 variable in struct mp3entry,
151 special processing function address
153 5. Add code to wps-display.c function get_tag to assign a printf-like
154 format specifier for the tag */
156 /* Structure for ID3 Tag extraction information */
157 struct tag_resolver {
158 const char* tag;
159 int tag_length;
160 size_t offset;
161 int (*ppFunc)(struct mp3entry*, char* tag, int bufferpos);
162 bool binary;
165 static bool global_ff_found;
167 static int unsynchronize(char* tag, int len, bool *ff_found)
169 int i;
170 unsigned char c;
171 unsigned char *rp, *wp;
173 wp = rp = (unsigned char *)tag;
175 rp = (unsigned char *)tag;
176 for(i = 0;i < len;i++) {
177 /* Read the next byte and write it back, but don't increment the
178 write pointer */
179 c = *rp++;
180 *wp = c;
181 if(*ff_found) {
182 /* Increment the write pointer if it isn't an unsynch pattern */
183 if(c != 0)
184 wp++;
185 *ff_found = false;
186 } else {
187 if(c == 0xff)
188 *ff_found = true;
189 wp++;
192 return (long)wp - (long)tag;
195 static int unsynchronize_frame(char* tag, int len)
197 bool ff_found = false;
199 return unsynchronize(tag, len, &ff_found);
202 static int read_unsynched(int fd, void *buf, int len)
204 int i;
205 int rc;
206 int remaining = len;
207 char *wp;
208 char *rp;
210 wp = buf;
212 while(remaining) {
213 rp = wp;
214 rc = read(fd, rp, remaining);
215 if(rc <= 0)
216 return rc;
218 i = unsynchronize(wp, remaining, &global_ff_found);
219 remaining -= i;
220 wp += i;
223 return len;
226 static int skip_unsynched(int fd, int len)
228 int rc;
229 int remaining = len;
230 int rlen;
231 char buf[32];
233 while(remaining) {
234 rlen = MIN(sizeof(buf), (unsigned int)remaining);
235 rc = read(fd, buf, rlen);
236 if(rc <= 0)
237 return rc;
239 remaining -= unsynchronize(buf, rlen, &global_ff_found);
242 return len;
245 /* parse numeric value from string */
246 static int parsetracknum( struct mp3entry* entry, char* tag, int bufferpos )
248 entry->tracknum = atoi( tag );
249 return bufferpos;
252 /* parse numeric value from string */
253 static int parsediscnum( struct mp3entry* entry, char* tag, int bufferpos )
255 entry->discnum = atoi( tag );
256 return bufferpos;
259 /* parse numeric value from string */
260 static int parseyearnum( struct mp3entry* entry, char* tag, int bufferpos )
262 entry->year = atoi( tag );
263 return bufferpos;
266 /* parse numeric genre from string, version 2.2 and 2.3 */
267 static int parsegenre( struct mp3entry* entry, char* tag, int bufferpos )
269 /* Use bufferpos to hold current position in entry->id3v2buf. */
270 bufferpos = tag - entry->id3v2buf;
272 if(entry->id3version >= ID3_VER_2_4) {
273 /* In version 2.4 and up, there are no parentheses, and the genre frame
274 is a list of strings, either numbers or text. */
276 /* Is it a number? */
277 if(isdigit(tag[0])) {
278 entry->genre_string = id3_get_num_genre(atoi( tag ));
279 return bufferpos;
280 } else {
281 entry->genre_string = tag;
282 return bufferpos + strlen(tag) + 1;
284 } else {
285 if( tag[0] == '(' && tag[1] != '(' ) {
286 entry->genre_string = id3_get_num_genre(atoi( tag + 1 ));
287 return bufferpos;
289 else {
290 entry->genre_string = tag;
291 return bufferpos + strlen(tag) + 1;
296 #ifdef HAVE_ALBUMART
297 /* parse embed albumart */
298 static int parsealbumart( struct mp3entry* entry, char* tag, int bufferpos )
300 entry->embed_albumart = false;
302 /* we currently don't support unsynchronizing albumart */
303 if (entry->albumart.type == AA_TYPE_UNSYNC)
304 return bufferpos;
306 entry->albumart.type = AA_TYPE_UNKNOWN;
308 char *start = tag;
309 /* skip text encoding */
310 tag += 1;
312 if (memcmp(tag, "image/", 6) == 0)
314 /* ID3 v2.3+ */
315 tag += 6;
316 if (strcmp(tag, "jpeg") == 0)
318 entry->albumart.type = AA_TYPE_JPG;
319 tag += 5;
321 else if (strcmp(tag, "png") == 0)
323 entry->albumart.type = AA_TYPE_PNG;
324 tag += 4;
327 else
329 /* ID3 v2.2 */
330 if (memcmp(tag, "JPG", 3) == 0)
331 entry->albumart.type = AA_TYPE_JPG;
332 else if (memcmp(tag, "PNG", 3) == 0)
333 entry->albumart.type = AA_TYPE_PNG;
334 tag += 3;
337 if (entry->albumart.type != AA_TYPE_UNKNOWN)
339 /* skip picture type */
340 tag += 1;
341 /* skip description */
342 tag = strchr(tag, '\0') + 1;
343 /* fixup offset&size for image data */
344 entry->albumart.pos += tag - start;
345 entry->albumart.size -= tag - start;
346 entry->embed_albumart = true;
348 /* return bufferpos as we didn't store anything in id3v2buf */
349 return bufferpos;
351 #endif
353 /* parse user defined text, looking for album artist and replaygain
354 * information.
356 static int parseuser( struct mp3entry* entry, char* tag, int bufferpos )
358 char* value = NULL;
359 int desc_len = strlen(tag);
360 int value_len = 0;
362 if ((tag - entry->id3v2buf + desc_len + 2) < bufferpos) {
363 /* At least part of the value was read, so we can safely try to
364 * parse it */
365 value = tag + desc_len + 1;
366 value_len = strlen(value) + 1;
368 if (!strcasecmp(tag, "ALBUM ARTIST")) {
369 strlcpy(tag, value, value_len);
370 entry->albumartist = tag;
371 #if CONFIG_CODEC == SWCODEC
372 } else {
373 value_len = parse_replaygain(tag, value, entry, tag, value_len);
374 #else
375 value_len = 0;
376 #endif
380 return tag - entry->id3v2buf + value_len;
383 #if CONFIG_CODEC == SWCODEC
384 /* parse RVA2 binary data and convert to replaygain information. */
385 static int parserva2( struct mp3entry* entry, char* tag, int bufferpos )
387 int desc_len = strlen(tag);
388 int start_pos = tag - entry->id3v2buf;
389 int end_pos = start_pos + desc_len + 5;
390 int value_len = 0;
391 unsigned char* value = tag + desc_len + 1;
393 /* Only parse RVA2 replaygain tags if tag version == 2.4 and channel
394 * type is master volume.
396 if (entry->id3version == ID3_VER_2_4 && end_pos < bufferpos
397 && *value++ == 1) {
398 long gain = 0;
399 long peak = 0;
400 long peakbits;
401 long peakbytes;
402 bool album = false;
404 /* The RVA2 specification is unclear on some things (id string and
405 * peak volume), but this matches how Quod Libet use them.
408 gain = (int16_t) ((value[0] << 8) | value[1]);
409 value += 2;
410 peakbits = *value++;
411 peakbytes = (peakbits + 7) / 8;
413 /* Only use the topmost 24 bits for peak volume */
414 if (peakbytes > 3) {
415 peakbytes = 3;
418 /* Make sure the peak bits were read */
419 if (end_pos + peakbytes < bufferpos) {
420 long shift = ((8 - (peakbits & 7)) & 7) + (3 - peakbytes) * 8;
422 for ( ; peakbytes; peakbytes--) {
423 peak <<= 8;
424 peak += *value++;
427 peak <<= shift;
429 if (peakbits > 24) {
430 peak += *value >> (8 - shift);
434 if (strcasecmp(tag, "album") == 0) {
435 album = true;
436 } else if (strcasecmp(tag, "track") != 0) {
437 /* Only accept non-track values if we don't have any previous
438 * value.
440 if (entry->track_gain != 0) {
441 return start_pos;
445 value_len = parse_replaygain_int(album, gain, peak * 2, entry,
446 tag, sizeof(entry->id3v2buf) - start_pos);
449 return start_pos + value_len;
451 #endif
453 static int parsembtid( struct mp3entry* entry, char* tag, int bufferpos )
455 char* value = NULL;
456 int desc_len = strlen(tag);
457 /*DEBUGF("MBID len: %d\n", desc_len);*/
458 /* Musicbrainz track IDs are always 36 chars long */
459 const size_t mbtid_len = 36;
461 if ((tag - entry->id3v2buf + desc_len + 2) < bufferpos)
463 value = tag + desc_len + 1;
465 if (strcasecmp(tag, "http://musicbrainz.org") == 0)
467 if (mbtid_len == strlen(value))
469 entry->mb_track_id = value;
470 return bufferpos + mbtid_len + 1;
475 return bufferpos;
478 static const struct tag_resolver taglist[] = {
479 { "TPE1", 4, offsetof(struct mp3entry, artist), NULL, false },
480 { "TP1", 3, offsetof(struct mp3entry, artist), NULL, false },
481 { "TIT2", 4, offsetof(struct mp3entry, title), NULL, false },
482 { "TT2", 3, offsetof(struct mp3entry, title), NULL, false },
483 { "TALB", 4, offsetof(struct mp3entry, album), NULL, false },
484 { "TAL", 3, offsetof(struct mp3entry, album), NULL, false },
485 { "TRK", 3, offsetof(struct mp3entry, track_string), &parsetracknum, false },
486 { "TPOS", 4, offsetof(struct mp3entry, disc_string), &parsediscnum, false },
487 { "TPA", 3, offsetof(struct mp3entry, disc_string), &parsediscnum, false },
488 { "TRCK", 4, offsetof(struct mp3entry, track_string), &parsetracknum, false },
489 { "TDRC", 4, offsetof(struct mp3entry, year_string), &parseyearnum, false },
490 { "TYER", 4, offsetof(struct mp3entry, year_string), &parseyearnum, false },
491 { "TYE", 3, offsetof(struct mp3entry, year_string), &parseyearnum, false },
492 { "TCOM", 4, offsetof(struct mp3entry, composer), NULL, false },
493 { "TCM", 3, offsetof(struct mp3entry, composer), NULL, false },
494 { "TPE2", 4, offsetof(struct mp3entry, albumartist), NULL, false },
495 { "TP2", 3, offsetof(struct mp3entry, albumartist), NULL, false },
496 { "TIT1", 4, offsetof(struct mp3entry, grouping), NULL, false },
497 { "TT1", 3, offsetof(struct mp3entry, grouping), NULL, false },
498 { "COMM", 4, offsetof(struct mp3entry, comment), NULL, false },
499 { "COM", 3, offsetof(struct mp3entry, comment), NULL, false },
500 { "TCON", 4, offsetof(struct mp3entry, genre_string), &parsegenre, false },
501 { "TCO", 3, offsetof(struct mp3entry, genre_string), &parsegenre, false },
502 #ifdef HAVE_ALBUMART
503 { "APIC", 4, 0, &parsealbumart, true },
504 { "PIC", 3, 0, &parsealbumart, true },
505 #endif
506 { "TXXX", 4, 0, &parseuser, false },
507 #if CONFIG_CODEC == SWCODEC
508 { "RVA2", 4, 0, &parserva2, true },
509 #endif
510 { "UFID", 4, 0, &parsembtid, false },
513 #define TAGLIST_SIZE ((int)ARRAYLEN(taglist))
515 /* Get the length of an ID3 string in the given encoding. Returns the length
516 * in bytes, including end nil, or -1 if the encoding is unknown.
518 static int unicode_len(char encoding, const void* string)
520 int len = 0;
522 if (encoding == 0x01 || encoding == 0x02) {
523 char first;
524 const char *s = string;
525 /* string might be unaligned, so using short* can crash on ARM and SH1 */
526 do {
527 first = *s++;
528 } while ((first | *s++) != 0);
530 len = s - (const char*) string;
531 } else {
532 len = strlen((char*) string) + 1;
535 return len;
538 /* Checks to see if the passed in string is a 16-bit wide Unicode v2
539 string. If it is, we convert it to a UTF-8 string. If it's not unicode,
540 we convert from the default codepage */
541 static int unicode_munge(char* string, char* utf8buf, int *len) {
542 long tmp;
543 bool le = false;
544 int i = 0;
545 unsigned char *str = (unsigned char *)string;
546 int templen = 0;
547 unsigned char* utf8 = (unsigned char *)utf8buf;
549 switch (str[0]) {
550 case 0x00: /* Type 0x00 is ordinary ISO 8859-1 */
551 str++;
552 (*len)--;
553 utf8 = iso_decode(str, utf8, -1, *len);
554 *utf8 = 0;
555 *len = (unsigned long)utf8 - (unsigned long)utf8buf;
556 break;
558 case 0x01: /* Unicode with or without BOM */
559 case 0x02:
560 (*len)--;
561 str++;
563 /* Handle frames with more than one string
564 (needed for TXXX frames).*/
565 do {
566 tmp = bytes2int(0, 0, str[0], str[1]);
568 /* Now check if there is a BOM
569 (zero-width non-breaking space, 0xfeff)
570 and if it is in little or big endian format */
571 if(tmp == 0xfffe) { /* Little endian? */
572 le = true;
573 str += 2;
574 (*len)-=2;
575 } else if(tmp == 0xfeff) { /* Big endian? */
576 str += 2;
577 (*len)-=2;
578 } else
579 /* If there is no BOM (which is a specification violation),
580 let's try to guess it. If one of the bytes is 0x00, it is
581 probably the most significant one. */
582 if(str[1] == 0)
583 le = true;
585 do {
586 if(le)
587 utf8 = utf16LEdecode(str, utf8, 1);
588 else
589 utf8 = utf16BEdecode(str, utf8, 1);
591 str+=2;
592 i += 2;
593 } while((str[0] || str[1]) && (i < *len));
595 *utf8++ = 0; /* Terminate the string */
596 templen += (strlen(&utf8buf[templen]) + 1);
597 str += 2;
598 i+=2;
599 } while(i < *len);
600 *len = templen - 1;
601 break;
603 case 0x03: /* UTF-8 encoded string */
604 for(i=0; i < *len; i++)
605 utf8[i] = str[i+1];
606 (*len)--;
607 break;
609 default: /* Plain old string */
610 utf8 = iso_decode(str, utf8, -1, *len);
611 *utf8 = 0;
612 *len = (unsigned long)utf8 - (unsigned long)utf8buf;
613 break;
615 return 0;
619 * Sets the title of an MP3 entry based on its ID3v1 tag.
621 * Arguments: file - the MP3 file to scen for a ID3v1 tag
622 * entry - the entry to set the title in
624 * Returns: true if a title was found and created, else false
626 bool setid3v1title(int fd, struct mp3entry *entry)
628 unsigned char buffer[128];
629 static const char offsets[] = {3, 33, 63, 97, 93, 125, 127};
630 int i, j;
631 unsigned char* utf8;
633 if (-1 == lseek(fd, -128, SEEK_END))
634 return false;
636 if (read(fd, buffer, sizeof buffer) != sizeof buffer)
637 return false;
639 if (strncmp((char *)buffer, "TAG", 3))
640 return false;
642 entry->id3v1len = 128;
643 entry->id3version = ID3_VER_1_0;
645 for (i=0; i < (int)sizeof offsets; i++) {
646 unsigned char* ptr = (unsigned char *)buffer + offsets[i];
648 switch(i) {
649 case 0:
650 case 1:
651 case 2:
652 /* kill trailing space in strings */
653 for (j=29; j && (ptr[j]==0 || ptr[j]==' '); j--)
654 ptr[j] = 0;
655 /* convert string to utf8 */
656 utf8 = (unsigned char *)entry->id3v1buf[i];
657 utf8 = iso_decode(ptr, utf8, -1, 30);
658 /* make sure string is terminated */
659 *utf8 = 0;
660 break;
662 case 3:
663 /* kill trailing space in strings */
664 for (j=27; j && (ptr[j]==0 || ptr[j]==' '); j--)
665 ptr[j] = 0;
666 /* convert string to utf8 */
667 utf8 = (unsigned char *)entry->id3v1buf[3];
668 utf8 = iso_decode(ptr, utf8, -1, 28);
669 /* make sure string is terminated */
670 *utf8 = 0;
671 break;
673 case 4:
674 ptr[4] = 0;
675 entry->year = atoi((char *)ptr);
676 break;
678 case 5:
679 /* id3v1.1 uses last two bytes of comment field for track
680 number: first must be 0 and second is track num */
681 if (!ptr[0] && ptr[1]) {
682 entry->tracknum = ptr[1];
683 entry->id3version = ID3_VER_1_1;
685 break;
687 case 6:
688 /* genre */
689 entry->genre_string = id3_get_num_genre(ptr[0]);
690 break;
694 entry->title = entry->id3v1buf[0];
695 entry->artist = entry->id3v1buf[1];
696 entry->album = entry->id3v1buf[2];
697 entry->comment = entry->id3v1buf[3];
699 return true;
704 * Sets the title of an MP3 entry based on its ID3v2 tag.
706 * Arguments: file - the MP3 file to scan for a ID3v2 tag
707 * entry - the entry to set the title in
709 * Returns: true if a title was found and created, else false
711 void setid3v2title(int fd, struct mp3entry *entry)
713 int minframesize;
714 int size;
715 long bufferpos = 0, totframelen, framelen;
716 char header[10];
717 char tmp[4];
718 unsigned char version;
719 char *buffer = entry->id3v2buf;
720 int bytesread = 0;
721 int buffersize = sizeof(entry->id3v2buf);
722 unsigned char global_flags;
723 int flags;
724 bool global_unsynch = false;
725 bool unsynch = false;
726 int i, j;
727 int rc;
728 #if CONFIG_CODEC == SWCODEC
729 bool itunes_gapless = false;
730 #endif
732 global_ff_found = false;
734 /* Bail out if the tag is shorter than 10 bytes */
735 if(entry->id3v2len < 10)
736 return;
738 /* Read the ID3 tag version from the header */
739 lseek(fd, 0, SEEK_SET);
740 if(10 != read(fd, header, 10))
741 return;
743 /* Get the total ID3 tag size */
744 size = entry->id3v2len - 10;
746 version = header[3];
747 switch ( version ) {
748 case 2:
749 version = ID3_VER_2_2;
750 minframesize = 8;
751 break;
753 case 3:
754 version = ID3_VER_2_3;
755 minframesize = 12;
756 break;
758 case 4:
759 version = ID3_VER_2_4;
760 minframesize = 12;
761 break;
763 default:
764 /* unsupported id3 version */
765 return;
767 entry->id3version = version;
768 entry->tracknum = entry->year = entry->discnum = 0;
769 entry->title = entry->artist = entry->album = NULL; /* FIXME incomplete */
771 global_flags = header[5];
773 /* Skip the extended header if it is present */
774 if(global_flags & 0x40) {
775 if(version == ID3_VER_2_3) {
776 if(10 != read(fd, header, 10))
777 return;
778 /* The 2.3 extended header size doesn't include the header size
779 field itself. Also, it is not unsynched. */
780 framelen =
781 bytes2int(header[0], header[1], header[2], header[3]) + 4;
783 /* Skip the rest of the header */
784 lseek(fd, framelen - 10, SEEK_CUR);
787 if(version >= ID3_VER_2_4) {
788 if(4 != read(fd, header, 4))
789 return;
791 /* The 2.4 extended header size does include the entire header,
792 so here we can just skip it. This header is unsynched. */
793 framelen = unsync(header[0], header[1],
794 header[2], header[3]);
796 lseek(fd, framelen - 4, SEEK_CUR);
800 /* Is unsynchronization applied? */
801 if(global_flags & 0x80) {
802 global_unsynch = true;
806 * We must have at least minframesize bytes left for the
807 * remaining frames to be interesting
809 while (size >= minframesize && bufferpos < buffersize - 1) {
810 flags = 0;
812 /* Read frame header and check length */
813 if(version >= ID3_VER_2_3) {
814 if(global_unsynch && version <= ID3_VER_2_3)
815 rc = read_unsynched(fd, header, 10);
816 else
817 rc = read(fd, header, 10);
818 if(rc != 10)
819 return;
820 /* Adjust for the 10 bytes we read */
821 size -= 10;
823 flags = bytes2int(0, 0, header[8], header[9]);
825 if (version >= ID3_VER_2_4) {
826 framelen = unsync(header[4], header[5],
827 header[6], header[7]);
828 } else {
829 /* version .3 files don't use synchsafe ints for
830 * size */
831 framelen = bytes2int(header[4], header[5],
832 header[6], header[7]);
834 } else {
835 if(6 != read(fd, header, 6))
836 return;
837 /* Adjust for the 6 bytes we read */
838 size -= 6;
840 framelen = bytes2int(0, header[3], header[4], header[5]);
843 logf("framelen = %ld, flags = 0x%04x", framelen, flags);
844 if(framelen == 0){
845 if (header[0] == 0 && header[1] == 0 && header[2] == 0)
846 return;
847 else
848 continue;
851 unsynch = false;
853 if(flags)
855 if (version >= ID3_VER_2_4) {
856 if(flags & 0x0040) { /* Grouping identity */
857 lseek(fd, 1, SEEK_CUR); /* Skip 1 byte */
858 framelen--;
860 } else {
861 if(flags & 0x0020) { /* Grouping identity */
862 lseek(fd, 1, SEEK_CUR); /* Skip 1 byte */
863 framelen--;
867 if(flags & 0x000c) /* Compression or encryption */
869 /* Skip it */
870 size -= framelen;
871 lseek(fd, framelen, SEEK_CUR);
872 continue;
875 if(flags & 0x0002) /* Unsynchronization */
876 unsynch = true;
878 if (version >= ID3_VER_2_4) {
879 if(flags & 0x0001) { /* Data length indicator */
880 if(4 != read(fd, tmp, 4))
881 return;
883 /* We don't need the data length */
884 framelen -= 4;
889 if (framelen == 0)
890 continue;
892 if (framelen < 0)
893 return;
895 /* Keep track of the remaining frame size */
896 totframelen = framelen;
898 /* If the frame is larger than the remaining buffer space we try
899 to read as much as would fit in the buffer */
900 if(framelen >= buffersize - bufferpos)
901 framelen = buffersize - bufferpos - 1;
903 /* Limit the maximum length of an id3 data item to ID3V2_MAX_ITEM_SIZE
904 bytes. This reduces the chance that the available buffer is filled
905 by single metadata items like large comments. */
906 if (ID3V2_MAX_ITEM_SIZE < framelen)
907 framelen = ID3V2_MAX_ITEM_SIZE;
909 logf("id3v2 frame: %.4s", header);
911 /* Check for certain frame headers
913 'size' is the amount of frame bytes remaining. We decrement it by
914 the amount of bytes we read. If we fail to read as many bytes as
915 we expect, we assume that we can't read from this file, and bail
916 out.
918 For each frame. we will iterate over the list of supported tags,
919 and read the tag into entry's buffer. All tags will be kept as
920 strings, for cases where a number won't do, e.g., YEAR: "circa
921 1765", "1790/1977" (composed/performed), "28 Feb 1969" TRACK:
922 "1/12", "1 of 12", GENRE: "Freeform genre name" Text is more
923 flexible, and as the main use of id3 data is to display it,
924 converting it to an int just means reconverting to display it, at a
925 runtime cost.
927 For tags that the current code does convert to ints, a post
928 processing function will be called via a pointer to function. */
930 for (i=0; i<TAGLIST_SIZE; i++) {
931 const struct tag_resolver* tr = &taglist[i];
932 char** ptag = tr->offset ? (char**) (((char*)entry) + tr->offset)
933 : NULL;
934 char* tag;
936 /* Only ID3_VER_2_2 uses frames with three-character names. */
937 if (((version == ID3_VER_2_2) && (tr->tag_length != 3))
938 || ((version > ID3_VER_2_2) && (tr->tag_length != 4))) {
939 continue;
942 if( !memcmp( header, tr->tag, tr->tag_length ) ) {
944 /* found a tag matching one in tagList, and not yet filled */
945 tag = buffer + bufferpos;
947 if(global_unsynch && version <= ID3_VER_2_3)
948 bytesread = read_unsynched(fd, tag, framelen);
949 else
950 bytesread = read(fd, tag, framelen);
952 if( bytesread != framelen )
953 return;
955 size -= bytesread;
957 if(unsynch || (global_unsynch && version >= ID3_VER_2_4))
958 bytesread = unsynchronize_frame(tag, bytesread);
960 /* the COMM frame has a 3 char field to hold an ISO-639-1
961 * language string and an optional short description;
962 * remove them so unicode_munge can work correctly
965 if((tr->tag_length == 4 && !memcmp( header, "COMM", 4)) ||
966 (tr->tag_length == 3 && !memcmp( header, "COM", 3))) {
967 int offset;
968 if(!strncmp(tag+4, "iTun", 4)) {
969 #if CONFIG_CODEC == SWCODEC
970 /* check for iTunes gapless information */
971 if(!strncmp(tag+4, "iTunSMPB", 8))
972 itunes_gapless = true;
973 else
974 #endif
975 /* ignore other with iTunes tags */
976 break;
979 offset = 3 + unicode_len(*tag, tag + 4);
980 if(bytesread > offset) {
981 bytesread -= offset;
982 memmove(tag + 1, tag + 1 + offset, bytesread - 1);
986 /* Attempt to parse Unicode string only if the tag contents
987 aren't binary */
988 if(!tr->binary) {
989 /* UTF-8 could potentially be 3 times larger */
990 /* so we need to create a new buffer */
991 char utf8buf[(3 * bytesread) + 1];
993 unicode_munge( tag, utf8buf, &bytesread );
995 if(bytesread >= buffersize - bufferpos)
996 bytesread = buffersize - bufferpos - 1;
998 for (j = 0; j < bytesread; j++)
999 tag[j] = utf8buf[j];
1001 /* remove trailing spaces */
1002 while ( bytesread > 0 && isspace(tag[bytesread-1]))
1003 bytesread--;
1006 tag[bytesread] = 0;
1007 bufferpos += bytesread + 1;
1009 #if CONFIG_CODEC == SWCODEC
1010 /* parse the tag if it contains iTunes gapless info */
1011 if (itunes_gapless)
1013 itunes_gapless = false;
1014 entry->lead_trim = get_itunes_int32(tag, 1);
1015 entry->tail_trim = get_itunes_int32(tag, 2);
1017 #endif
1019 /* Note that parser functions sometimes set *ptag to NULL, so
1020 * the "!*ptag" check here doesn't always have the desired
1021 * effect. Should the parser functions (parsegenre in
1022 * particular) be updated to handle the case of being called
1023 * multiple times, or should the "*ptag" check be removed?
1025 if (ptag && !*ptag)
1026 *ptag = tag;
1028 #ifdef HAVE_ALBUMART
1029 /* albumart */
1030 if ((!entry->embed_albumart) &&
1031 ((tr->tag_length == 4 && !memcmp( header, "APIC", 4)) ||
1032 (tr->tag_length == 3 && !memcmp( header, "PIC" , 3))))
1034 if (unsynch || (global_unsynch && version <= ID3_VER_2_3))
1035 entry->albumart.type = AA_TYPE_UNSYNC;
1036 else
1038 entry->albumart.pos = lseek(fd, 0, SEEK_CUR) - framelen;
1039 entry->albumart.size = totframelen;
1040 entry->albumart.type = AA_TYPE_UNKNOWN;
1043 #endif
1044 if( tr->ppFunc )
1045 bufferpos = tr->ppFunc(entry, tag, bufferpos);
1047 /* Seek to the next frame */
1048 if(framelen < totframelen)
1049 lseek(fd, totframelen - framelen, SEEK_CUR);
1050 break;
1054 if( i == TAGLIST_SIZE ) {
1055 /* no tag in tagList was found, or it was a repeat.
1056 skip it using the total size */
1058 if(global_unsynch && version <= ID3_VER_2_3) {
1059 size -= skip_unsynched(fd, totframelen);
1060 } else {
1061 size -= totframelen;
1062 if( lseek(fd, totframelen, SEEK_CUR) == -1 )
1063 return;
1070 * Calculates the size of the ID3v2 tag.
1072 * Arguments: file - the file to search for a tag.
1074 * Returns: the size of the tag or 0 if none was found
1076 int getid3v2len(int fd)
1078 char buf[6];
1079 int offset;
1081 /* Make sure file has a ID3 tag */
1082 if((-1 == lseek(fd, 0, SEEK_SET)) ||
1083 (read(fd, buf, 6) != 6) ||
1084 (strncmp(buf, "ID3", strlen("ID3")) != 0))
1085 offset = 0;
1087 /* Now check what the ID3v2 size field says */
1088 else
1089 if(read(fd, buf, 4) != 4)
1090 offset = 0;
1091 else
1092 offset = unsync(buf[0], buf[1], buf[2], buf[3]) + 10;
1094 logf("ID3V2 Length: 0x%x", offset);
1095 return offset;
1098 #ifdef DEBUG_STANDALONE
1100 char *secs2str(int ms)
1102 static char buffer[32];
1103 int secs = ms/1000;
1104 ms %= 1000;
1105 snprintf(buffer, sizeof(buffer), "%d:%02d.%d", secs/60, secs%60, ms/100);
1106 return buffer;
1109 int main(int argc, char **argv)
1111 int i;
1112 for(i=1; i<argc; i++) {
1113 struct mp3entry mp3;
1114 mp3.album = "Bogus";
1115 if(mp3info(&mp3, argv[i], false)) {
1116 printf("Failed to get %s\n", argv[i]);
1117 return 0;
1120 printf("****** File: %s\n"
1121 " Title: %s\n"
1122 " Artist: %s\n"
1123 " Album: %s\n"
1124 " Genre: %s (%d) \n"
1125 " Composer: %s\n"
1126 " Year: %s (%d)\n"
1127 " Track: %s (%d)\n"
1128 " Length: %s / %d s\n"
1129 " Bitrate: %d\n"
1130 " Frequency: %d\n",
1131 argv[i],
1132 mp3.title?mp3.title:"<blank>",
1133 mp3.artist?mp3.artist:"<blank>",
1134 mp3.album?mp3.album:"<blank>",
1135 mp3.genre_string?mp3.genre_string:"<blank>",
1136 mp3.genre,
1137 mp3.composer?mp3.composer:"<blank>",
1138 mp3.year_string?mp3.year_string:"<blank>",
1139 mp3.year,
1140 mp3.track_string?mp3.track_string:"<blank>",
1141 mp3.tracknum,
1142 secs2str(mp3.length),
1143 mp3.length/1000,
1144 mp3.bitrate,
1145 mp3.frequency);
1148 return 0;
1151 #endif