FS#11920: Do not overwrite already existing metadata and take into account string...
[maemo-rb.git] / apps / metadata / id3tags.c
blob9143f8ad25c6a1e42c0b499af6a07f281612e2cb
1 /***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
10 * Copyright (C) 2002 by Daniel Stenberg
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version 2
15 * of the License, or (at your option) any later version.
17 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
18 * KIND, either express or implied.
20 ****************************************************************************/
22 * Parts of this code has been stolen from the Ample project and was written
23 * by David H�deman. It has since been extended and enhanced pretty much by
24 * all sorts of friendly Rockbox people.
28 /* tagResolver and associated code copyright 2003 Thomas Paul Diffenbach
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <errno.h>
35 #include <stdbool.h>
36 #include <stddef.h>
37 #include <ctype.h>
38 #include "string-extra.h"
39 #include "config.h"
40 #include "file.h"
41 #include "logf.h"
42 #include "system.h"
43 #include "replaygain.h"
44 #include "rbunicode.h"
46 #include "metadata.h"
47 #include "mp3data.h"
48 #if CONFIG_CODEC == SWCODEC
49 #include "metadata_common.h"
50 #endif
51 #include "metadata_parsers.h"
53 static unsigned long unsync(unsigned long b0,
54 unsigned long b1,
55 unsigned long b2,
56 unsigned long b3)
58 return (((long)(b0 & 0x7F) << (3*7)) |
59 ((long)(b1 & 0x7F) << (2*7)) |
60 ((long)(b2 & 0x7F) << (1*7)) |
61 ((long)(b3 & 0x7F) << (0*7)));
64 static const char* const genres[] = {
65 "Blues", "Classic Rock", "Country", "Dance", "Disco", "Funk", "Grunge",
66 "Hip-Hop", "Jazz", "Metal", "New Age", "Oldies", "Other", "Pop", "R&B",
67 "Rap", "Reggae", "Rock", "Techno", "Industrial", "Alternative", "Ska",
68 "Death Metal", "Pranks", "Soundtrack", "Euro-Techno", "Ambient", "Trip-Hop",
69 "Vocal", "Jazz+Funk", "Fusion", "Trance", "Classical", "Instrumental",
70 "Acid", "House", "Game", "Sound Clip", "Gospel", "Noise", "AlternRock",
71 "Bass", "Soul", "Punk", "Space", "Meditative", "Instrumental Pop",
72 "Instrumental Rock", "Ethnic", "Gothic", "Darkwave", "Techno-Industrial",
73 "Electronic", "Pop-Folk", "Eurodance", "Dream", "Southern Rock", "Comedy",
74 "Cult", "Gangsta", "Top 40", "Christian Rap", "Pop/Funk", "Jungle",
75 "Native American", "Cabaret", "New Wave", "Psychadelic", "Rave",
76 "Showtunes", "Trailer", "Lo-Fi", "Tribal", "Acid Punk", "Acid Jazz",
77 "Polka", "Retro", "Musical", "Rock & Roll", "Hard Rock",
79 /* winamp extensions */
80 "Folk", "Folk-Rock", "National Folk", "Swing", "Fast Fusion", "Bebob",
81 "Latin", "Revival", "Celtic", "Bluegrass", "Avantgarde", "Gothic Rock",
82 "Progressive Rock", "Psychedelic Rock", "Symphonic Rock", "Slow Rock",
83 "Big Band", "Chorus", "Easy Listening", "Acoustic", "Humour", "Speech",
84 "Chanson", "Opera", "Chamber Music", "Sonata", "Symphony", "Booty Bass",
85 "Primus", "Porn Groove", "Satire", "Slow Jam", "Club", "Tango", "Samba",
86 "Folklore", "Ballad", "Power Ballad", "Rhythmic Soul", "Freestyle",
87 "Duet", "Punk Rock", "Drum Solo", "A capella", "Euro-House", "Dance Hall",
88 "Goa", "Drum & Bass", "Club-House", "Hardcore", "Terror", "Indie",
89 "BritPop", "Negerpunk", "Polsk Punk", "Beat", "Christian Gangsta Rap",
90 "Heavy Metal", "Black Metal", "Crossover", "Contemporary Christian",
91 "Christian Rock", "Merengue", "Salsa", "Thrash Metal", "Anime", "Jpop",
92 "Synthpop"
95 char* id3_get_num_genre(unsigned int genre_num)
97 if (genre_num < ARRAYLEN(genres))
98 return (char*)genres[genre_num];
99 return NULL;
103 HOW TO ADD ADDITIONAL ID3 VERSION 2 TAGS
104 Code and comments by Thomas Paul Diffenbach
106 To add another ID3v2 Tag, do the following:
107 1. add a char* named for the tag to struct mp3entry in id3.h,
108 (I (tpd) prefer to use char* rather than ints, even for what seems like
109 numerical values, for cases where a number won't do, e.g.,
110 YEAR: "circa 1765", "1790/1977" (composed/performed), "28 Feb 1969"
111 TRACK: "1/12", "1 of 12", GENRE: "Freeform genre name"
112 Text is more flexible, and as the main use of id3 data is to
113 display it, converting it to an int just means reconverting to
114 display it, at a runtime cost.)
116 2. If any special processing beyond copying the tag value from the Id3
117 block to the struct mp3entry is rrequired (such as converting to an
118 int), write a function to perform this special processing.
120 This function's prototype must match that of
121 typedef tagPostProcessFunc, that is it must be:
122 int func( struct mp3entry*, char* tag, int bufferpos )
123 the first argument is a pointer to the current mp3entry structure the
124 second argument is a pointer to the null terminated string value of the
125 tag found the third argument is the offset of the next free byte in the
126 mp3entry's buffer your function should return the corrected offset; if
127 you don't lengthen or shorten the tag string, you can return the third
128 argument unchanged.
130 Unless you have a good reason no to, make the function static.
131 TO JUST COPY THE TAG NO SPECIAL PROCESSING FUNCTION IS NEEDED.
133 3. add one or more entries to the tagList array, using the format:
134 char* ID3 Tag symbolic name -- see the ID3 specification for these,
135 sizeof() that name minus 1,
136 offsetof( struct mp3entry, variable_name_in_struct_mp3entry ),
137 pointer to your special processing function or NULL
138 if you need no special processing
139 flag indicating if this tag is binary or textual
140 Many ID3 symbolic names come in more than one form. You can add both
141 forms, each referencing the same variable in struct mp3entry.
142 If both forms are present, the last found will be used.
143 Note that the offset can be zero, in which case no entry will be set
144 in the mp3entry struct; the frame is still read into the buffer and
145 the special processing function is called (several times, if there
146 are several frames with the same name).
148 4. Alternately, use the TAG_LIST_ENTRY macro with
149 ID3 tag symbolic name,
150 variable in struct mp3entry,
151 special processing function address
153 5. Add code to wps-display.c function get_tag to assign a printf-like
154 format specifier for the tag */
156 /* Structure for ID3 Tag extraction information */
157 struct tag_resolver {
158 const char* tag;
159 int tag_length;
160 size_t offset;
161 int (*ppFunc)(struct mp3entry*, char* tag, int bufferpos);
162 bool binary;
165 static bool global_ff_found;
167 static int unsynchronize(char* tag, int len, bool *ff_found)
169 int i;
170 unsigned char c;
171 unsigned char *rp, *wp;
173 wp = rp = (unsigned char *)tag;
175 rp = (unsigned char *)tag;
176 for(i = 0;i < len;i++) {
177 /* Read the next byte and write it back, but don't increment the
178 write pointer */
179 c = *rp++;
180 *wp = c;
181 if(*ff_found) {
182 /* Increment the write pointer if it isn't an unsynch pattern */
183 if(c != 0)
184 wp++;
185 *ff_found = false;
186 } else {
187 if(c == 0xff)
188 *ff_found = true;
189 wp++;
192 return (long)wp - (long)tag;
195 static int unsynchronize_frame(char* tag, int len)
197 bool ff_found = false;
199 return unsynchronize(tag, len, &ff_found);
202 static int read_unsynched(int fd, void *buf, int len)
204 int i;
205 int rc;
206 int remaining = len;
207 char *wp;
208 char *rp;
210 wp = buf;
212 while(remaining) {
213 rp = wp;
214 rc = read(fd, rp, remaining);
215 if(rc <= 0)
216 return rc;
218 i = unsynchronize(wp, remaining, &global_ff_found);
219 remaining -= i;
220 wp += i;
223 return len;
226 static int skip_unsynched(int fd, int len)
228 int rc;
229 int remaining = len;
230 int rlen;
231 char buf[32];
233 while(remaining) {
234 rlen = MIN(sizeof(buf), (unsigned int)remaining);
235 rc = read(fd, buf, rlen);
236 if(rc <= 0)
237 return rc;
239 remaining -= unsynchronize(buf, rlen, &global_ff_found);
242 return len;
245 /* parse numeric value from string */
246 static int parsetracknum( struct mp3entry* entry, char* tag, int bufferpos )
248 entry->tracknum = atoi( tag );
249 return bufferpos;
252 /* parse numeric value from string */
253 static int parsediscnum( struct mp3entry* entry, char* tag, int bufferpos )
255 entry->discnum = atoi( tag );
256 return bufferpos;
259 /* parse numeric value from string */
260 static int parseyearnum( struct mp3entry* entry, char* tag, int bufferpos )
262 entry->year = atoi( tag );
263 return bufferpos;
266 /* parse numeric genre from string, version 2.2 and 2.3 */
267 static int parsegenre( struct mp3entry* entry, char* tag, int bufferpos )
269 if(entry->id3version >= ID3_VER_2_4) {
270 /* In version 2.4 and up, there are no parentheses, and the genre frame
271 is a list of strings, either numbers or text. */
273 /* Is it a number? */
274 if(isdigit(tag[0])) {
275 entry->genre_string = id3_get_num_genre(atoi( tag ));
276 return tag - entry->id3v2buf;
277 } else {
278 entry->genre_string = tag;
279 return bufferpos;
281 } else {
282 if( tag[0] == '(' && tag[1] != '(' ) {
283 entry->genre_string = id3_get_num_genre(atoi( tag + 1 ));
284 return tag - entry->id3v2buf;
286 else {
287 entry->genre_string = tag;
288 return bufferpos;
293 #ifdef HAVE_ALBUMART
294 /* parse embed albumart */
295 static int parsealbumart( struct mp3entry* entry, char* tag, int bufferpos )
297 entry->embed_albumart = false;
299 /* we currently don't support unsynchronizing albumart */
300 if (entry->albumart.type == AA_TYPE_UNSYNC)
301 return bufferpos;
303 entry->albumart.type = AA_TYPE_UNKNOWN;
305 char *start = tag;
306 /* skip text encoding */
307 tag += 1;
309 if (memcmp(tag, "image/", 6) == 0)
311 /* ID3 v2.3+ */
312 tag += 6;
313 if (strcmp(tag, "jpeg") == 0)
315 entry->albumart.type = AA_TYPE_JPG;
316 tag += 5;
318 else if (strcmp(tag, "png") == 0)
320 entry->albumart.type = AA_TYPE_PNG;
321 tag += 4;
324 else
326 /* ID3 v2.2 */
327 if (memcmp(tag, "JPG", 3) == 0)
328 entry->albumart.type = AA_TYPE_JPG;
329 else if (memcmp(tag, "PNG", 3) == 0)
330 entry->albumart.type = AA_TYPE_PNG;
331 tag += 3;
334 if (entry->albumart.type != AA_TYPE_UNKNOWN)
336 /* skip picture type */
337 tag += 1;
338 /* skip description */
339 tag = strchr(tag, '\0') + 1;
340 /* fixup offset&size for image data */
341 entry->albumart.pos += tag - start;
342 entry->albumart.size -= tag - start;
343 entry->embed_albumart = true;
345 /* return bufferpos as we didn't store anything in id3v2buf */
346 return bufferpos;
348 #endif
350 /* parse user defined text, looking for album artist and replaygain
351 * information.
353 static int parseuser( struct mp3entry* entry, char* tag, int bufferpos )
355 char* value = NULL;
356 int desc_len = strlen(tag);
357 int value_len = 0;
359 if ((tag - entry->id3v2buf + desc_len + 2) < bufferpos) {
360 /* At least part of the value was read, so we can safely try to
361 * parse it */
362 value = tag + desc_len + 1;
363 value_len = bufferpos - (tag - entry->id3v2buf);
365 if (!strcasecmp(tag, "ALBUM ARTIST")) {
366 strlcpy(tag, value, value_len);
367 entry->albumartist = tag;
368 #if CONFIG_CODEC == SWCODEC
369 } else {
370 value_len = parse_replaygain(tag, value, entry, tag, value_len);
371 #endif
375 return tag - entry->id3v2buf + value_len;
378 #if CONFIG_CODEC == SWCODEC
379 /* parse RVA2 binary data and convert to replaygain information. */
380 static int parserva2( struct mp3entry* entry, char* tag, int bufferpos )
382 int desc_len = strlen(tag);
383 int start_pos = tag - entry->id3v2buf;
384 int end_pos = start_pos + desc_len + 5;
385 int value_len = 0;
386 unsigned char* value = tag + desc_len + 1;
388 /* Only parse RVA2 replaygain tags if tag version == 2.4 and channel
389 * type is master volume.
391 if (entry->id3version == ID3_VER_2_4 && end_pos < bufferpos
392 && *value++ == 1) {
393 long gain = 0;
394 long peak = 0;
395 long peakbits;
396 long peakbytes;
397 bool album = false;
399 /* The RVA2 specification is unclear on some things (id string and
400 * peak volume), but this matches how Quod Libet use them.
403 gain = (int16_t) ((value[0] << 8) | value[1]);
404 value += 2;
405 peakbits = *value++;
406 peakbytes = (peakbits + 7) / 8;
408 /* Only use the topmost 24 bits for peak volume */
409 if (peakbytes > 3) {
410 peakbytes = 3;
413 /* Make sure the peak bits were read */
414 if (end_pos + peakbytes < bufferpos) {
415 long shift = ((8 - (peakbits & 7)) & 7) + (3 - peakbytes) * 8;
417 for ( ; peakbytes; peakbytes--) {
418 peak <<= 8;
419 peak += *value++;
422 peak <<= shift;
424 if (peakbits > 24) {
425 peak += *value >> (8 - shift);
429 if (strcasecmp(tag, "album") == 0) {
430 album = true;
431 } else if (strcasecmp(tag, "track") != 0) {
432 /* Only accept non-track values if we don't have any previous
433 * value.
435 if (entry->track_gain != 0) {
436 return start_pos;
440 value_len = parse_replaygain_int(album, gain, peak * 2, entry,
441 tag, sizeof(entry->id3v2buf) - start_pos);
444 return start_pos + value_len;
446 #endif
448 static int parsembtid( struct mp3entry* entry, char* tag, int bufferpos )
450 char* value = NULL;
451 int desc_len = strlen(tag);
452 /*DEBUGF("MBID len: %d\n", desc_len);*/
453 /* Musicbrainz track IDs are always 36 chars long */
454 const size_t mbtid_len = 36;
456 if ((tag - entry->id3v2buf + desc_len + 2) < bufferpos)
458 value = tag + desc_len + 1;
460 if (strcasecmp(tag, "http://musicbrainz.org") == 0)
462 if (mbtid_len == strlen(value))
464 entry->mb_track_id = value;
465 return bufferpos + mbtid_len + 1;
470 return bufferpos;
473 static const struct tag_resolver taglist[] = {
474 { "TPE1", 4, offsetof(struct mp3entry, artist), NULL, false },
475 { "TP1", 3, offsetof(struct mp3entry, artist), NULL, false },
476 { "TIT2", 4, offsetof(struct mp3entry, title), NULL, false },
477 { "TT2", 3, offsetof(struct mp3entry, title), NULL, false },
478 { "TALB", 4, offsetof(struct mp3entry, album), NULL, false },
479 { "TAL", 3, offsetof(struct mp3entry, album), NULL, false },
480 { "TRK", 3, offsetof(struct mp3entry, track_string), &parsetracknum, false },
481 { "TPOS", 4, offsetof(struct mp3entry, disc_string), &parsediscnum, false },
482 { "TPA", 3, offsetof(struct mp3entry, disc_string), &parsediscnum, false },
483 { "TRCK", 4, offsetof(struct mp3entry, track_string), &parsetracknum, false },
484 { "TDRC", 4, offsetof(struct mp3entry, year_string), &parseyearnum, false },
485 { "TYER", 4, offsetof(struct mp3entry, year_string), &parseyearnum, false },
486 { "TYE", 3, offsetof(struct mp3entry, year_string), &parseyearnum, false },
487 { "TCOM", 4, offsetof(struct mp3entry, composer), NULL, false },
488 { "TCM", 3, offsetof(struct mp3entry, composer), NULL, false },
489 { "TPE2", 4, offsetof(struct mp3entry, albumartist), NULL, false },
490 { "TP2", 3, offsetof(struct mp3entry, albumartist), NULL, false },
491 { "TIT1", 4, offsetof(struct mp3entry, grouping), NULL, false },
492 { "TT1", 3, offsetof(struct mp3entry, grouping), NULL, false },
493 { "COMM", 4, offsetof(struct mp3entry, comment), NULL, false },
494 { "COM", 3, offsetof(struct mp3entry, comment), NULL, false },
495 { "TCON", 4, offsetof(struct mp3entry, genre_string), &parsegenre, false },
496 { "TCO", 3, offsetof(struct mp3entry, genre_string), &parsegenre, false },
497 #ifdef HAVE_ALBUMART
498 { "APIC", 4, 0, &parsealbumart, true },
499 { "PIC", 3, 0, &parsealbumart, true },
500 #endif
501 { "TXXX", 4, 0, &parseuser, false },
502 #if CONFIG_CODEC == SWCODEC
503 { "RVA2", 4, 0, &parserva2, true },
504 #endif
505 { "UFID", 4, 0, &parsembtid, false },
508 #define TAGLIST_SIZE ((int)ARRAYLEN(taglist))
510 /* Get the length of an ID3 string in the given encoding. Returns the length
511 * in bytes, including end nil, or -1 if the encoding is unknown.
513 static int unicode_len(char encoding, const void* string)
515 int len = 0;
517 if (encoding == 0x01 || encoding == 0x02) {
518 char first;
519 const char *s = string;
520 /* string might be unaligned, so using short* can crash on ARM and SH1 */
521 do {
522 first = *s++;
523 } while ((first | *s++) != 0);
525 len = s - (const char*) string;
526 } else {
527 len = strlen((char*) string) + 1;
530 return len;
533 /* Checks to see if the passed in string is a 16-bit wide Unicode v2
534 string. If it is, we convert it to a UTF-8 string. If it's not unicode,
535 we convert from the default codepage */
536 static int unicode_munge(char* string, char* utf8buf, int *len) {
537 long tmp;
538 bool le = false;
539 int i = 0;
540 unsigned char *str = (unsigned char *)string;
541 int templen = 0;
542 unsigned char* utf8 = (unsigned char *)utf8buf;
544 switch (str[0]) {
545 case 0x00: /* Type 0x00 is ordinary ISO 8859-1 */
546 str++;
547 (*len)--;
548 utf8 = iso_decode(str, utf8, -1, *len);
549 *utf8 = 0;
550 *len = (unsigned long)utf8 - (unsigned long)utf8buf;
551 break;
553 case 0x01: /* Unicode with or without BOM */
554 case 0x02:
555 (*len)--;
556 str++;
558 /* Handle frames with more than one string
559 (needed for TXXX frames).*/
560 do {
561 tmp = bytes2int(0, 0, str[0], str[1]);
563 /* Now check if there is a BOM
564 (zero-width non-breaking space, 0xfeff)
565 and if it is in little or big endian format */
566 if(tmp == 0xfffe) { /* Little endian? */
567 le = true;
568 str += 2;
569 (*len)-=2;
570 } else if(tmp == 0xfeff) { /* Big endian? */
571 str += 2;
572 (*len)-=2;
573 } else
574 /* If there is no BOM (which is a specification violation),
575 let's try to guess it. If one of the bytes is 0x00, it is
576 probably the most significant one. */
577 if(str[1] == 0)
578 le = true;
580 do {
581 if(le)
582 utf8 = utf16LEdecode(str, utf8, 1);
583 else
584 utf8 = utf16BEdecode(str, utf8, 1);
586 str+=2;
587 i += 2;
588 } while((str[0] || str[1]) && (i < *len));
590 *utf8++ = 0; /* Terminate the string */
591 templen += (strlen(&utf8buf[templen]) + 1);
592 str += 2;
593 i+=2;
594 } while(i < *len);
595 *len = templen - 1;
596 break;
598 case 0x03: /* UTF-8 encoded string */
599 for(i=0; i < *len; i++)
600 utf8[i] = str[i+1];
601 (*len)--;
602 break;
604 default: /* Plain old string */
605 utf8 = iso_decode(str, utf8, -1, *len);
606 *utf8 = 0;
607 *len = (unsigned long)utf8 - (unsigned long)utf8buf;
608 break;
610 return 0;
614 * Sets the title of an MP3 entry based on its ID3v1 tag.
616 * Arguments: file - the MP3 file to scen for a ID3v1 tag
617 * entry - the entry to set the title in
619 * Returns: true if a title was found and created, else false
621 bool setid3v1title(int fd, struct mp3entry *entry)
623 unsigned char buffer[128];
624 static const char offsets[] = {3, 33, 63, 97, 93, 125, 127};
625 int i, j;
626 unsigned char* utf8;
628 if (-1 == lseek(fd, -128, SEEK_END))
629 return false;
631 if (read(fd, buffer, sizeof buffer) != sizeof buffer)
632 return false;
634 if (strncmp((char *)buffer, "TAG", 3))
635 return false;
637 entry->id3v1len = 128;
638 entry->id3version = ID3_VER_1_0;
640 for (i=0; i < (int)sizeof offsets; i++) {
641 unsigned char* ptr = (unsigned char *)buffer + offsets[i];
643 switch(i) {
644 case 0:
645 case 1:
646 case 2:
647 /* kill trailing space in strings */
648 for (j=29; j && (ptr[j]==0 || ptr[j]==' '); j--)
649 ptr[j] = 0;
650 /* convert string to utf8 */
651 utf8 = (unsigned char *)entry->id3v1buf[i];
652 utf8 = iso_decode(ptr, utf8, -1, 30);
653 /* make sure string is terminated */
654 *utf8 = 0;
655 break;
657 case 3:
658 /* kill trailing space in strings */
659 for (j=27; j && (ptr[j]==0 || ptr[j]==' '); j--)
660 ptr[j] = 0;
661 /* convert string to utf8 */
662 utf8 = (unsigned char *)entry->id3v1buf[3];
663 utf8 = iso_decode(ptr, utf8, -1, 28);
664 /* make sure string is terminated */
665 *utf8 = 0;
666 break;
668 case 4:
669 ptr[4] = 0;
670 entry->year = atoi((char *)ptr);
671 break;
673 case 5:
674 /* id3v1.1 uses last two bytes of comment field for track
675 number: first must be 0 and second is track num */
676 if (!ptr[0] && ptr[1]) {
677 entry->tracknum = ptr[1];
678 entry->id3version = ID3_VER_1_1;
680 break;
682 case 6:
683 /* genre */
684 entry->genre_string = id3_get_num_genre(ptr[0]);
685 break;
689 entry->title = entry->id3v1buf[0];
690 entry->artist = entry->id3v1buf[1];
691 entry->album = entry->id3v1buf[2];
692 entry->comment = entry->id3v1buf[3];
694 return true;
699 * Sets the title of an MP3 entry based on its ID3v2 tag.
701 * Arguments: file - the MP3 file to scan for a ID3v2 tag
702 * entry - the entry to set the title in
704 * Returns: true if a title was found and created, else false
706 void setid3v2title(int fd, struct mp3entry *entry)
708 int minframesize;
709 int size;
710 long bufferpos = 0, totframelen, framelen;
711 char header[10];
712 char tmp[4];
713 unsigned char version;
714 char *buffer = entry->id3v2buf;
715 int bytesread = 0;
716 int buffersize = sizeof(entry->id3v2buf);
717 unsigned char global_flags;
718 int flags;
719 bool global_unsynch = false;
720 bool unsynch = false;
721 int i, j;
722 int rc;
723 #if CONFIG_CODEC == SWCODEC
724 bool itunes_gapless = false;
725 #endif
727 global_ff_found = false;
729 /* Bail out if the tag is shorter than 10 bytes */
730 if(entry->id3v2len < 10)
731 return;
733 /* Read the ID3 tag version from the header */
734 lseek(fd, 0, SEEK_SET);
735 if(10 != read(fd, header, 10))
736 return;
738 /* Get the total ID3 tag size */
739 size = entry->id3v2len - 10;
741 version = header[3];
742 switch ( version ) {
743 case 2:
744 version = ID3_VER_2_2;
745 minframesize = 8;
746 break;
748 case 3:
749 version = ID3_VER_2_3;
750 minframesize = 12;
751 break;
753 case 4:
754 version = ID3_VER_2_4;
755 minframesize = 12;
756 break;
758 default:
759 /* unsupported id3 version */
760 return;
762 entry->id3version = version;
763 entry->tracknum = entry->year = entry->discnum = 0;
764 entry->title = entry->artist = entry->album = NULL; /* FIXME incomplete */
766 global_flags = header[5];
768 /* Skip the extended header if it is present */
769 if(global_flags & 0x40) {
770 if(version == ID3_VER_2_3) {
771 if(10 != read(fd, header, 10))
772 return;
773 /* The 2.3 extended header size doesn't include the header size
774 field itself. Also, it is not unsynched. */
775 framelen =
776 bytes2int(header[0], header[1], header[2], header[3]) + 4;
778 /* Skip the rest of the header */
779 lseek(fd, framelen - 10, SEEK_CUR);
782 if(version >= ID3_VER_2_4) {
783 if(4 != read(fd, header, 4))
784 return;
786 /* The 2.4 extended header size does include the entire header,
787 so here we can just skip it. This header is unsynched. */
788 framelen = unsync(header[0], header[1],
789 header[2], header[3]);
791 lseek(fd, framelen - 4, SEEK_CUR);
795 /* Is unsynchronization applied? */
796 if(global_flags & 0x80) {
797 global_unsynch = true;
801 * We must have at least minframesize bytes left for the
802 * remaining frames to be interesting
804 while (size >= minframesize && bufferpos < buffersize - 1) {
805 flags = 0;
807 /* Read frame header and check length */
808 if(version >= ID3_VER_2_3) {
809 if(global_unsynch && version <= ID3_VER_2_3)
810 rc = read_unsynched(fd, header, 10);
811 else
812 rc = read(fd, header, 10);
813 if(rc != 10)
814 return;
815 /* Adjust for the 10 bytes we read */
816 size -= 10;
818 flags = bytes2int(0, 0, header[8], header[9]);
820 if (version >= ID3_VER_2_4) {
821 framelen = unsync(header[4], header[5],
822 header[6], header[7]);
823 } else {
824 /* version .3 files don't use synchsafe ints for
825 * size */
826 framelen = bytes2int(header[4], header[5],
827 header[6], header[7]);
829 } else {
830 if(6 != read(fd, header, 6))
831 return;
832 /* Adjust for the 6 bytes we read */
833 size -= 6;
835 framelen = bytes2int(0, header[3], header[4], header[5]);
838 logf("framelen = %ld, flags = 0x%04x", framelen, flags);
839 if(framelen == 0){
840 if (header[0] == 0 && header[1] == 0 && header[2] == 0)
841 return;
842 else
843 continue;
846 unsynch = false;
848 if(flags)
850 if (version >= ID3_VER_2_4) {
851 if(flags & 0x0040) { /* Grouping identity */
852 lseek(fd, 1, SEEK_CUR); /* Skip 1 byte */
853 framelen--;
855 } else {
856 if(flags & 0x0020) { /* Grouping identity */
857 lseek(fd, 1, SEEK_CUR); /* Skip 1 byte */
858 framelen--;
862 if(flags & 0x000c) /* Compression or encryption */
864 /* Skip it */
865 size -= framelen;
866 lseek(fd, framelen, SEEK_CUR);
867 continue;
870 if(flags & 0x0002) /* Unsynchronization */
871 unsynch = true;
873 if (version >= ID3_VER_2_4) {
874 if(flags & 0x0001) { /* Data length indicator */
875 if(4 != read(fd, tmp, 4))
876 return;
878 /* We don't need the data length */
879 framelen -= 4;
884 if (framelen == 0)
885 continue;
887 if (framelen < 0)
888 return;
890 /* Keep track of the remaining frame size */
891 totframelen = framelen;
893 /* If the frame is larger than the remaining buffer space we try
894 to read as much as would fit in the buffer */
895 if(framelen >= buffersize - bufferpos)
896 framelen = buffersize - bufferpos - 1;
898 /* Limit the maximum length of an id3 data item to ID3V2_MAX_ITEM_SIZE
899 bytes. This reduces the chance that the available buffer is filled
900 by single metadata items like large comments. */
901 if (ID3V2_MAX_ITEM_SIZE < framelen)
902 framelen = ID3V2_MAX_ITEM_SIZE;
904 logf("id3v2 frame: %.4s", header);
906 /* Check for certain frame headers
908 'size' is the amount of frame bytes remaining. We decrement it by
909 the amount of bytes we read. If we fail to read as many bytes as
910 we expect, we assume that we can't read from this file, and bail
911 out.
913 For each frame. we will iterate over the list of supported tags,
914 and read the tag into entry's buffer. All tags will be kept as
915 strings, for cases where a number won't do, e.g., YEAR: "circa
916 1765", "1790/1977" (composed/performed), "28 Feb 1969" TRACK:
917 "1/12", "1 of 12", GENRE: "Freeform genre name" Text is more
918 flexible, and as the main use of id3 data is to display it,
919 converting it to an int just means reconverting to display it, at a
920 runtime cost.
922 For tags that the current code does convert to ints, a post
923 processing function will be called via a pointer to function. */
925 for (i=0; i<TAGLIST_SIZE; i++) {
926 const struct tag_resolver* tr = &taglist[i];
927 char** ptag = tr->offset ? (char**) (((char*)entry) + tr->offset)
928 : NULL;
929 char* tag;
931 /* Only ID3_VER_2_2 uses frames with three-character names. */
932 if (((version == ID3_VER_2_2) && (tr->tag_length != 3))
933 || ((version > ID3_VER_2_2) && (tr->tag_length != 4))) {
934 continue;
937 if( !memcmp( header, tr->tag, tr->tag_length ) ) {
939 /* found a tag matching one in tagList, and not yet filled */
940 tag = buffer + bufferpos;
942 if(global_unsynch && version <= ID3_VER_2_3)
943 bytesread = read_unsynched(fd, tag, framelen);
944 else
945 bytesread = read(fd, tag, framelen);
947 if( bytesread != framelen )
948 return;
950 size -= bytesread;
952 if(unsynch || (global_unsynch && version >= ID3_VER_2_4))
953 bytesread = unsynchronize_frame(tag, bytesread);
955 /* the COMM frame has a 3 char field to hold an ISO-639-1
956 * language string and an optional short description;
957 * remove them so unicode_munge can work correctly
960 if((tr->tag_length == 4 && !memcmp( header, "COMM", 4)) ||
961 (tr->tag_length == 3 && !memcmp( header, "COM", 3))) {
962 int offset;
963 if(!strncmp(tag+4, "iTun", 4)) {
964 #if CONFIG_CODEC == SWCODEC
965 /* check for iTunes gapless information */
966 if(!strncmp(tag+4, "iTunSMPB", 8))
967 itunes_gapless = true;
968 else
969 #endif
970 /* ignore other with iTunes tags */
971 break;
974 offset = 3 + unicode_len(*tag, tag + 4);
975 if(bytesread > offset) {
976 bytesread -= offset;
977 memmove(tag + 1, tag + 1 + offset, bytesread - 1);
981 /* Attempt to parse Unicode string only if the tag contents
982 aren't binary */
983 if(!tr->binary) {
984 /* UTF-8 could potentially be 3 times larger */
985 /* so we need to create a new buffer */
986 char utf8buf[(3 * bytesread) + 1];
988 unicode_munge( tag, utf8buf, &bytesread );
990 if(bytesread >= buffersize - bufferpos)
991 bytesread = buffersize - bufferpos - 1;
993 for (j = 0; j < bytesread; j++)
994 tag[j] = utf8buf[j];
996 /* remove trailing spaces */
997 while ( bytesread > 0 && isspace(tag[bytesread-1]))
998 bytesread--;
1001 tag[bytesread] = 0;
1002 bufferpos += bytesread + 1;
1004 #if CONFIG_CODEC == SWCODEC
1005 /* parse the tag if it contains iTunes gapless info */
1006 if (itunes_gapless)
1008 itunes_gapless = false;
1009 entry->lead_trim = get_itunes_int32(tag, 1);
1010 entry->tail_trim = get_itunes_int32(tag, 2);
1012 #endif
1014 /* Note that parser functions sometimes set *ptag to NULL, so
1015 * the "!*ptag" check here doesn't always have the desired
1016 * effect. Should the parser functions (parsegenre in
1017 * particular) be updated to handle the case of being called
1018 * multiple times, or should the "*ptag" check be removed?
1020 if (ptag && !*ptag)
1021 *ptag = tag;
1023 #ifdef HAVE_ALBUMART
1024 /* albumart */
1025 if ((!entry->embed_albumart) &&
1026 ((tr->tag_length == 4 && !memcmp( header, "APIC", 4)) ||
1027 (tr->tag_length == 3 && !memcmp( header, "PIC" , 3))))
1029 if (unsynch || (global_unsynch && version <= ID3_VER_2_3))
1030 entry->albumart.type = AA_TYPE_UNSYNC;
1031 else
1033 entry->albumart.pos = lseek(fd, 0, SEEK_CUR) - framelen;
1034 entry->albumart.size = totframelen;
1035 entry->albumart.type = AA_TYPE_UNKNOWN;
1038 #endif
1039 if( tr->ppFunc )
1040 bufferpos = tr->ppFunc(entry, tag, bufferpos);
1042 /* Trim. Take into account that multiple string contents will
1043 * only be displayed up to their first null termination. All
1044 * content after this null termination is obsolete and can be
1045 * overwritten. */
1046 bufferpos -= (bytesread - strlen(tag));
1048 /* Seek to the next frame */
1049 if(framelen < totframelen)
1050 lseek(fd, totframelen - framelen, SEEK_CUR);
1051 break;
1055 if( i == TAGLIST_SIZE ) {
1056 /* no tag in tagList was found, or it was a repeat.
1057 skip it using the total size */
1059 if(global_unsynch && version <= ID3_VER_2_3) {
1060 size -= skip_unsynched(fd, totframelen);
1061 } else {
1062 size -= totframelen;
1063 if( lseek(fd, totframelen, SEEK_CUR) == -1 )
1064 return;
1071 * Calculates the size of the ID3v2 tag.
1073 * Arguments: file - the file to search for a tag.
1075 * Returns: the size of the tag or 0 if none was found
1077 int getid3v2len(int fd)
1079 char buf[6];
1080 int offset;
1082 /* Make sure file has a ID3 tag */
1083 if((-1 == lseek(fd, 0, SEEK_SET)) ||
1084 (read(fd, buf, 6) != 6) ||
1085 (strncmp(buf, "ID3", strlen("ID3")) != 0))
1086 offset = 0;
1088 /* Now check what the ID3v2 size field says */
1089 else
1090 if(read(fd, buf, 4) != 4)
1091 offset = 0;
1092 else
1093 offset = unsync(buf[0], buf[1], buf[2], buf[3]) + 10;
1095 logf("ID3V2 Length: 0x%x", offset);
1096 return offset;
1099 #ifdef DEBUG_STANDALONE
1101 char *secs2str(int ms)
1103 static char buffer[32];
1104 int secs = ms/1000;
1105 ms %= 1000;
1106 snprintf(buffer, sizeof(buffer), "%d:%02d.%d", secs/60, secs%60, ms/100);
1107 return buffer;
1110 int main(int argc, char **argv)
1112 int i;
1113 for(i=1; i<argc; i++) {
1114 struct mp3entry mp3;
1115 mp3.album = "Bogus";
1116 if(mp3info(&mp3, argv[i], false)) {
1117 printf("Failed to get %s\n", argv[i]);
1118 return 0;
1121 printf("****** File: %s\n"
1122 " Title: %s\n"
1123 " Artist: %s\n"
1124 " Album: %s\n"
1125 " Genre: %s (%d) \n"
1126 " Composer: %s\n"
1127 " Year: %s (%d)\n"
1128 " Track: %s (%d)\n"
1129 " Length: %s / %d s\n"
1130 " Bitrate: %d\n"
1131 " Frequency: %d\n",
1132 argv[i],
1133 mp3.title?mp3.title:"<blank>",
1134 mp3.artist?mp3.artist:"<blank>",
1135 mp3.album?mp3.album:"<blank>",
1136 mp3.genre_string?mp3.genre_string:"<blank>",
1137 mp3.genre,
1138 mp3.composer?mp3.composer:"<blank>",
1139 mp3.year_string?mp3.year_string:"<blank>",
1140 mp3.year,
1141 mp3.track_string?mp3.track_string:"<blank>",
1142 mp3.tracknum,
1143 secs2str(mp3.length),
1144 mp3.length/1000,
1145 mp3.bitrate,
1146 mp3.frequency);
1149 return 0;
1152 #endif