help_view: one in all patch
[cmus.git] / id3.c
blob9d120b482dbc404b0031a836d773e5cffb7b2047
1 /*
2 * Copyright 2005 Timo Hirvonen
3 */
5 #include "id3.h"
6 #include "comment.h"
7 #include "xmalloc.h"
8 #include "utf8_encode.h"
9 #include "uchar.h"
10 #include "options.h"
11 #include "debug.h"
13 #include <unistd.h>
14 #include <inttypes.h>
15 #include <errno.h>
16 #include <stdio.h>
19 * position:
21 * 0 "ID3"
22 * -10 "3DI"
23 * -128 "TAG"
24 * -138 "3DI"
26 * if v2 is at beginning _and_ at end then there must be a seek tag at beginning
29 struct ID3 {
30 char v1[128];
31 char *v2[NUM_ID3_KEYS];
33 unsigned int has_v1 : 1;
34 unsigned int has_v2 : 1;
37 struct v2_header {
38 unsigned char ver_major;
39 unsigned char ver_minor;
40 unsigned char flags;
41 uint32_t size;
44 struct v2_extended_header {
45 uint32_t size;
48 struct v2_frame_header {
49 char id[4];
50 uint32_t size;
51 uint16_t flags;
54 #define V2_HEADER_UNSYNC (1 << 7)
55 #define V2_HEADER_EXTENDED (1 << 6)
56 #define V2_HEADER_EXPERIMENTAL (1 << 5)
57 #define V2_HEADER_FOOTER (1 << 4)
59 #define V2_FRAME_COMPRESSED (1 << 3) /* great idea!!1 */
60 #define V2_FRAME_ENCRYPTHED (1 << 2) /* wow, this is very neat! */
61 #define V2_FRAME_UNSYNC (1 << 1)
62 #define V2_FRAME_LEN_INDICATOR (1 << 0)
64 #define NR_GENRES 148
65 /* genres {{{ */
66 static const char *genres[NR_GENRES] = {
67 "Blues",
68 "Classic Rock",
69 "Country",
70 "Dance",
71 "Disco",
72 "Funk",
73 "Grunge",
74 "Hip-Hop",
75 "Jazz",
76 "Metal",
77 "New Age",
78 "Oldies",
79 "Other",
80 "Pop",
81 "R&B",
82 "Rap",
83 "Reggae",
84 "Rock",
85 "Techno",
86 "Industrial",
87 "Alternative",
88 "Ska",
89 "Death Metal",
90 "Pranks",
91 "Soundtrack",
92 "Euro-Techno",
93 "Ambient",
94 "Trip-Hop",
95 "Vocal",
96 "Jazz+Funk",
97 "Fusion",
98 "Trance",
99 "Classical",
100 "Instrumental",
101 "Acid",
102 "House",
103 "Game",
104 "Sound Clip",
105 "Gospel",
106 "Noise",
107 "Alt",
108 "Bass",
109 "Soul",
110 "Punk",
111 "Space",
112 "Meditative",
113 "Instrumental Pop",
114 "Instrumental Rock",
115 "Ethnic",
116 "Gothic",
117 "Darkwave",
118 "Techno-Industrial",
119 "Electronic",
120 "Pop-Folk",
121 "Eurodance",
122 "Dream",
123 "Southern Rock",
124 "Comedy",
125 "Cult",
126 "Gangsta Rap",
127 "Top 40",
128 "Christian Rap",
129 "Pop/Funk",
130 "Jungle",
131 "Native American",
132 "Cabaret",
133 "New Wave",
134 "Psychedelic",
135 "Rave",
136 "Showtunes",
137 "Trailer",
138 "Lo-Fi",
139 "Tribal",
140 "Acid Punk",
141 "Acid Jazz",
142 "Polka",
143 "Retro",
144 "Musical",
145 "Rock & Roll",
146 "Hard Rock",
147 "Folk",
148 "Folk/Rock",
149 "National Folk",
150 "Swing",
151 "Fast-Fusion",
152 "Bebob",
153 "Latin",
154 "Revival",
155 "Celtic",
156 "Bluegrass",
157 "Avantgarde",
158 "Gothic Rock",
159 "Progressive Rock",
160 "Psychedelic Rock",
161 "Symphonic Rock",
162 "Slow Rock",
163 "Big Band",
164 "Chorus",
165 "Easy Listening",
166 "Acoustic",
167 "Humour",
168 "Speech",
169 "Chanson",
170 "Opera",
171 "Chamber Music",
172 "Sonata",
173 "Symphony",
174 "Booty Bass",
175 "Primus",
176 "Porn Groove",
177 "Satire",
178 "Slow Jam",
179 "Club",
180 "Tango",
181 "Samba",
182 "Folklore",
183 "Ballad",
184 "Power Ballad",
185 "Rhythmic Soul",
186 "Freestyle",
187 "Duet",
188 "Punk Rock",
189 "Drum Solo",
190 "A Cappella",
191 "Euro-House",
192 "Dance Hall",
193 "Goa",
194 "Drum & Bass",
195 "Club-House",
196 "Hardcore",
197 "Terror",
198 "Indie",
199 "BritPop",
200 "Negerpunk",
201 "Polsk Punk",
202 "Beat",
203 "Christian Gangsta Rap",
204 "Heavy Metal",
205 "Black Metal",
206 "Crossover",
207 "Contemporary Christian",
208 "Christian Rock",
209 "Merengue",
210 "Salsa",
211 "Thrash Metal",
212 "Anime",
213 "JPop",
214 "Synthpop"
216 /* }}} */
218 #if 1
219 #define id3_debug(...) d_print(__VA_ARGS__)
220 #else
221 #define id3_debug(...) do { } while (0)
222 #endif
224 static int utf16_is_special(const uchar uch)
226 if (UTF16_IS_HSURROGATE(uch) || UTF16_IS_LSURROGATE(uch) || UTF16_IS_BOM(uch))
227 return -1;
228 return 0;
231 static char *utf16_to_utf8(const unsigned char *buf, int buf_size)
233 char *out;
234 int i, idx;
236 out = xnew(char, (buf_size / 2) * 4 + 1);
237 i = idx = 0;
238 while (buf_size - i >= 2) {
239 uchar u;
241 u = buf[i] + (buf[i + 1] << 8);
242 if (u_is_unicode(u)) {
243 if (utf16_is_special(u) == 0)
244 u_set_char(out, &idx, u);
245 } else {
246 free(out);
247 return NULL;
249 if (u == 0)
250 return out;
251 i += 2;
253 u_set_char(out, &idx, 0);
254 return out;
257 static char *utf16be_to_utf8(const unsigned char *buf, int buf_size)
259 char *out;
260 int i, idx;
262 out = xnew(char, (buf_size / 2) * 4 + 1);
263 i = 0;
264 idx = 0;
265 while (buf_size - i >= 2) {
266 uchar u;
268 u = buf[i + 1] + (buf[i] << 8);
269 if (u_is_unicode(u)) {
270 if (utf16_is_special(u) == 0)
271 u_set_char(out, &idx, u);
272 } else {
273 free(out);
274 return NULL;
276 if (u == 0)
277 return out;
278 i += 2;
280 u_set_char(out, &idx, 0);
281 return out;
284 static int is_v1(const char *buf)
286 return buf[0] == 'T' && buf[1] == 'A' && buf[2] == 'G';
289 static int u32_unsync(const unsigned char *buf, uint32_t *up)
291 uint32_t b, u = 0;
292 int i;
294 for (i = 0; i < 4; i++) {
295 b = buf[i];
296 if (b >= 0x80)
297 return 0;
298 u <<= 7;
299 u |= b;
301 *up = u;
302 return 1;
305 static void get_u32(const unsigned char *buf, uint32_t *up)
307 uint32_t b, u = 0;
308 int i;
310 for (i = 0; i < 4; i++) {
311 b = buf[i];
312 u <<= 8;
313 u |= b;
315 *up = u;
318 static void get_u24(const unsigned char *buf, uint32_t *up)
320 uint32_t b, u = 0;
321 int i;
323 for (i = 0; i < 3; i++) {
324 b = buf[i];
325 u <<= 8;
326 u |= b;
328 *up = u;
331 static int v2_header_footer_parse(struct v2_header *header, const char *buf)
333 const unsigned char *b = (const unsigned char *)buf;
335 header->ver_major = b[3];
336 header->ver_minor = b[4];
337 header->flags = b[5];
338 if (header->ver_major == 0xff || header->ver_minor == 0xff)
339 return 0;
340 return u32_unsync(b + 6, &header->size);
343 static int v2_header_parse(struct v2_header *header, const char *buf)
345 if (buf[0] != 'I' || buf[1] != 'D' || buf[2] != '3')
346 return 0;
347 return v2_header_footer_parse(header, buf);
350 static int v2_footer_parse(struct v2_header *header, const char *buf)
352 if (buf[0] != '3' || buf[1] != 'D' || buf[2] != 'I')
353 return 0;
354 return v2_header_footer_parse(header, buf);
357 static int v2_extended_header_parse(struct v2_extended_header *header, const char *buf)
359 return u32_unsync((const unsigned char *)buf, &header->size);
362 static int is_frame_id_char(char ch)
364 return (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9');
367 /* XXXYYY
369 * X = [A-Z0-9]
370 * Y = byte
372 * XXX is frame
373 * YYY is frame size excluding this 6 byte header
375 static int v2_2_0_frame_header_parse(struct v2_frame_header *header, const char *buf)
377 int i;
379 for (i = 0; i < 3; i++) {
380 if (!is_frame_id_char(buf[i]))
381 return 0;
382 header->id[i] = buf[i];
384 header->id[3] = 0;
385 get_u24((const unsigned char *)(buf + 3), &header->size);
386 header->flags = 0;
387 if (header->size == 0)
388 return 0;
389 id3_debug("%c%c%c %d\n", header->id[0], header->id[1], header->id[2], header->size);
390 return 1;
393 /* XXXXYYYYZZ
395 * X = [A-Z0-9]
396 * Y = byte
397 * Z = byte
399 * XXXX is frame
400 * YYYY is frame size excluding this 10 byte header
401 * ZZ is flags
403 static int v2_3_0_frame_header_parse(struct v2_frame_header *header, const char *buf)
405 int i;
407 for (i = 0; i < 4; i++) {
408 if (!is_frame_id_char(buf[i]))
409 return 0;
410 header->id[i] = buf[i];
412 get_u32((const unsigned char *)(buf + 4), &header->size);
413 header->flags = (buf[8] << 8) | buf[9];
414 if (header->size == 0)
415 return 0;
416 id3_debug("%c%c%c%c %d\n", header->id[0], header->id[1], header->id[2],
417 header->id[3], header->size);
418 return 1;
421 /* same as 2.3 but header size is sync safe */
422 static int v2_4_0_frame_header_parse(struct v2_frame_header *header, const char *buf)
424 int i;
426 for (i = 0; i < 4; i++) {
427 if (!is_frame_id_char(buf[i]))
428 return 0;
429 header->id[i] = buf[i];
431 if (!u32_unsync((const unsigned char *)(buf + 4), &header->size))
432 return 0;
433 header->flags = (buf[8] << 8) | buf[9];
434 if (header->size == 0)
435 return 0;
436 id3_debug("%c%c%c%c %d\n", header->id[0], header->id[1], header->id[2],
437 header->id[3], header->size);
438 return 1;
441 static int read_all(int fd, char *buf, size_t size)
443 size_t pos = 0;
445 while (pos < size) {
446 int rc = read(fd, buf + pos, size - pos);
448 if (rc == -1) {
449 if (errno == EINTR || errno == EAGAIN)
450 continue;
451 return -1;
453 pos += rc;
455 return 0;
458 static char *parse_genre(const char *str)
460 int parenthesis = 0;
461 long int idx;
462 char *end;
464 if (strncasecmp(str, "(RX", 3) == 0)
465 return xstrdup("Remix");
467 if (strncasecmp(str, "(CR", 3) == 0)
468 return xstrdup("Cover");
470 if (*str == '(') {
471 parenthesis = 1;
472 str++;
475 idx = strtol(str, &end, 10);
476 if (str != end) {
477 /* Number parsed but there may be some crap after the number.
478 * I don't care, ID3v2 by definition contains crap.
480 if (idx >= 0 && idx < NR_GENRES)
481 return xstrdup(genres[idx]);
484 if (parenthesis) {
485 const char *ptr = strchr(str, ')');
487 if (ptr && ptr[1]) {
488 /* genre name after random crap in parenthesis,
489 * return the genre name */
490 return xstrdup(ptr + 1);
492 str--;
495 /* random crap, just return it and wait for a bug report */
496 return xstrdup(str);
499 /* http://www.id3.org/id3v2.4.0-structure.txt */
500 static struct {
501 const char name[8];
502 enum id3_key key;
503 } frame_tab[] = {
504 /* 2.4.0 */
505 { "TDRC", ID3_DATE },
507 /* >= 2.3.0 */
508 { "TPE1", ID3_ARTIST },
509 { "TALB", ID3_ALBUM },
510 { "TIT2", ID3_TITLE },
511 { "TYER", ID3_DATE },
512 { "TCON", ID3_GENRE },
513 { "TPOS", ID3_DISC },
514 { "TRCK", ID3_TRACK },
516 /* obsolete frames (2.2.0) */
517 { "TP1", ID3_ARTIST },
518 { "TAL", ID3_ALBUM },
519 { "TT2", ID3_TITLE },
520 { "TYE", ID3_DATE },
521 { "TCO", ID3_GENRE },
522 { "TPA", ID3_DISC },
523 { "TRK", ID3_TRACK },
525 { "", -1 }
528 static void v2_add_frame(ID3 *id3, struct v2_frame_header *fh, const char *buf)
530 int i, encoding = *buf++, len = fh->size - 1;
532 if (encoding > 3)
533 return;
535 for (i = 0; frame_tab[i].key != -1; i++) {
536 enum id3_key key = frame_tab[i].key;
537 char *in, *out;
538 int rc;
540 if (strncmp(fh->id, frame_tab[i].name, 4))
541 continue;
543 switch (encoding) {
544 case 0x00: /* ISO-8859-1 */
545 in = xstrndup(buf, len);
546 rc = utf8_encode(in, id3_default_charset, &out);
547 free(in);
548 if (rc)
549 return;
550 break;
551 case 0x03: /* UTF-8 */
552 in = xstrndup(buf, len);
553 if (u_is_valid(in)) {
554 out = in;
555 } else {
556 rc = utf8_encode(in, id3_default_charset, &out);
557 free(in);
558 if (rc)
559 return;
561 break;
562 case 0x01: /* UTF-16 */
563 out = utf16_to_utf8((const unsigned char *)buf, len);
564 if (out == NULL)
565 return;
566 break;
567 case 0x02: /* UTF-16BE */
568 out = utf16be_to_utf8((const unsigned char *)buf, len);
569 if (out == NULL)
570 return;
571 break;
573 if (key == ID3_TRACK || key == ID3_DISC)
574 fix_track_or_disc(out);
575 if (key == ID3_GENRE) {
576 char *tmp;
578 id3_debug("genre before: '%s'\n", out);
579 tmp = parse_genre(out);
580 free(out);
581 out = tmp;
583 free(id3->v2[key]);
584 id3->v2[key] = out;
585 id3->has_v2 = 1;
586 id3_debug("%s '%s'\n", frame_tab[i].name, out);
587 break;
591 static void unsync(unsigned char *buf, int *lenp)
593 int len = *lenp;
594 int s, d;
596 s = d = 0;
597 while (s < len - 1) {
598 if (buf[s] == 0xff && buf[s + 1] == 0x00) {
599 /* 0xff 0x00 -> 0xff */
600 buf[d++] = 0xff;
601 s += 2;
603 if (s < len - 2 && buf[s] == 0x00) {
604 /* 0xff 0x00 0x00 -> 0xff 0x00 */
605 buf[d++] = 0x00;
606 s++;
608 continue;
610 buf[d++] = buf[s++];
612 if (s < len)
613 buf[d++] = buf[s++];
615 d_print("unsyncronization removed %d bytes\n", s - d);
616 *lenp = d;
619 static int v2_read(ID3 *id3, int fd, const struct v2_header *header)
621 char *buf;
622 int rc, buf_size;
623 int frame_start, i;
624 int frame_header_size;
626 buf_size = header->size;
627 buf = xnew(char, buf_size);
628 rc = read_all(fd, buf, buf_size);
629 if (rc) {
630 free(buf);
631 return rc;
634 frame_start = 0;
635 if (header->flags & V2_HEADER_EXTENDED) {
636 struct v2_extended_header ext;
638 v2_extended_header_parse(&ext, buf);
639 if (ext.size > buf_size) {
640 id3_debug("extended header corrupted\n");
641 free(buf);
642 return -2;
644 frame_start = ext.size;
645 /* should check if update flag is set */
648 if (header->flags & V2_HEADER_UNSYNC) {
649 int len = buf_size - frame_start;
651 unsync((unsigned char *)(buf + frame_start), &len);
652 buf_size = len + frame_start;
655 frame_header_size = 10;
656 if (header->ver_major == 2)
657 frame_header_size = 6;
659 i = frame_start;
660 while (i < buf_size - frame_header_size) {
661 struct v2_frame_header fh;
662 int len;
664 if (header->ver_major == 2) {
665 if (!v2_2_0_frame_header_parse(&fh, buf + i))
666 break;
667 } else if (header->ver_major == 3) {
668 if (!v2_3_0_frame_header_parse(&fh, buf + i))
669 break;
670 } else {
671 /* assume v2.4 */
672 if (!v2_4_0_frame_header_parse(&fh, buf + i))
673 break;
676 i += frame_header_size;
677 if (fh.size > buf_size - i) {
678 id3_debug("frame too big\n");
679 break;
682 len = fh.size;
683 if (fh.flags & V2_FRAME_UNSYNC) {
684 int tmp = len;
686 unsync((unsigned char *)(buf + i), &tmp);
687 fh.size = tmp;
689 v2_add_frame(id3, &fh, buf + i);
690 i += len;
693 free(buf);
694 return 0;
697 int id3_tag_size(const char *buf, int buf_size)
699 struct v2_header header;
701 if (buf_size < 10)
702 return 0;
703 if (v2_header_parse(&header, buf)) {
704 if (header.flags & V2_HEADER_FOOTER) {
705 /* header + data + footer */
706 id3_debug("v2.%d.%d with footer\n", header.ver_major, header.ver_minor);
707 return 10 + header.size + 10;
709 /* header */
710 id3_debug("v2.%d.%d\n", header.ver_major, header.ver_minor);
711 return 10 + header.size;
713 if (buf_size >= 3 && is_v1(buf)) {
714 id3_debug("v1\n");
715 return 128;
717 return 0;
720 ID3 *id3_new(void)
722 return xnew0(ID3, 1);
725 void id3_free(ID3 *id3)
727 int i;
729 for (i = 0; i < NUM_ID3_KEYS; i++)
730 free(id3->v2[i]);
731 free(id3);
734 int id3_read_tags(ID3 *id3, int fd, unsigned int flags)
736 off_t off;
737 int rc;
739 if (flags & ID3_V2) {
740 struct v2_header header;
741 char buf[138];
743 rc = read_all(fd, buf, 10);
744 if (rc)
745 goto rc_error;
746 if (v2_header_parse(&header, buf)) {
747 rc = v2_read(id3, fd, &header);
748 if (rc)
749 goto rc_error;
750 /* get v1 if needed */
751 } else {
752 /* get v2 from end and optionally v1 */
754 off = lseek(fd, -138, SEEK_END);
755 if (off == -1)
756 goto error;
757 rc = read_all(fd, buf, 138);
758 if (rc)
759 goto rc_error;
761 if (is_v1(buf + 10)) {
762 if (flags & ID3_V1) {
763 memcpy(id3->v1, buf + 10, 128);
764 id3->has_v1 = 1;
766 if (v2_footer_parse(&header, buf)) {
767 /* footer at end of file - 128 */
768 off = lseek(fd, -(header.size + 138), SEEK_END);
769 if (off == -1)
770 goto error;
771 rc = v2_read(id3, fd, &header);
772 if (rc)
773 goto rc_error;
775 } else if (v2_footer_parse(&header, buf + 128)) {
776 /* footer at end of file */
777 off = lseek(fd, -(header.size + 10), SEEK_END);
778 if (off == -1)
779 goto error;
780 rc = v2_read(id3, fd, &header);
781 if (rc)
782 goto rc_error;
784 return 0;
787 if (flags & ID3_V1) {
788 off = lseek(fd, -128, SEEK_END);
789 if (off == -1)
790 goto error;
791 rc = read_all(fd, id3->v1, 128);
792 if (rc)
793 goto rc_error;
794 id3->has_v1 = is_v1(id3->v1);
796 return 0;
797 error:
798 rc = -1;
799 rc_error:
800 return rc;
803 static char *v1_get_str(const char *buf, int len)
805 char in[32];
806 char *out;
807 int i;
809 for (i = len - 1; i >= 0; i--) {
810 if (buf[i] != 0 && buf[i] != ' ')
811 break;
813 if (i == -1)
814 return NULL;
815 memcpy(in, buf, i + 1);
816 in[i + 1] = 0;
817 if (u_is_valid(in))
818 return xstrdup(in);
819 if (utf8_encode(in, id3_default_charset, &out))
820 return NULL;
821 return out;
824 char *id3_get_comment(ID3 *id3, enum id3_key key)
826 if (id3->has_v2) {
827 if (id3->v2[key])
828 return xstrdup(id3->v2[key]);
830 if (id3->has_v1) {
831 switch (key) {
832 case ID3_ARTIST:
833 return v1_get_str(id3->v1 + 33, 30);
834 case ID3_ALBUM:
835 return v1_get_str(id3->v1 + 63, 30);
836 case ID3_TITLE:
837 return v1_get_str(id3->v1 + 3, 30);
838 case ID3_DATE:
839 return v1_get_str(id3->v1 + 93, 4);
840 case ID3_GENRE:
842 unsigned char idx = id3->v1[127];
844 if (idx >= NR_GENRES)
845 return NULL;
846 return xstrdup(genres[idx]);
848 case ID3_DISC:
849 return NULL;
850 case ID3_TRACK:
852 char *t;
854 if (id3->v1[125] != 0)
855 return NULL;
856 t = xnew(char, 4);
857 snprintf(t, 4, "%d", ((unsigned char *)id3->v1)[126]);
858 return t;
862 return NULL;