Sort playlist file for library views in same order as view 2
[cmus.git] / id3.c
blob355ba1f19237e7a1557c7b5b4ee7f1984a8eaf99
1 /*
2 * Copyright 2005 Timo Hirvonen
3 */
5 #include "id3.h"
6 #include "comment.h"
7 #include "xmalloc.h"
8 #include "utf8_encode.h"
9 #include "uchar.h"
10 #include "options.h"
11 #include "debug.h"
13 #include <unistd.h>
14 #include <inttypes.h>
15 #include <errno.h>
16 #include <stdio.h>
19 * position:
21 * 0 "ID3"
22 * -10 "3DI"
23 * -128 "TAG"
24 * -138 "3DI"
26 * if v2 is at beginning _and_ at end then there must be a seek tag at beginning
29 struct ID3 {
30 char v1[128];
31 char *v2[NUM_ID3_KEYS];
33 unsigned int has_v1 : 1;
34 unsigned int has_v2 : 1;
37 struct v2_header {
38 unsigned char ver_major;
39 unsigned char ver_minor;
40 unsigned char flags;
41 uint32_t size;
44 struct v2_extended_header {
45 uint32_t size;
48 struct v2_frame_header {
49 char id[4];
50 uint32_t size;
51 uint16_t flags;
54 #define V2_HEADER_UNSYNC (1 << 7)
55 #define V2_HEADER_EXTENDED (1 << 6)
56 #define V2_HEADER_EXPERIMENTAL (1 << 5)
57 #define V2_HEADER_FOOTER (1 << 4)
59 #define V2_FRAME_COMPRESSED (1 << 3) /* great idea!!1 */
60 #define V2_FRAME_ENCRYPTHED (1 << 2) /* wow, this is very neat! */
61 #define V2_FRAME_UNSYNC (1 << 1)
62 #define V2_FRAME_LEN_INDICATOR (1 << 0)
64 #define NR_GENRES 148
65 /* genres {{{ */
66 static const char *genres[NR_GENRES] = {
67 "Blues",
68 "Classic Rock",
69 "Country",
70 "Dance",
71 "Disco",
72 "Funk",
73 "Grunge",
74 "Hip-Hop",
75 "Jazz",
76 "Metal",
77 "New Age",
78 "Oldies",
79 "Other",
80 "Pop",
81 "R&B",
82 "Rap",
83 "Reggae",
84 "Rock",
85 "Techno",
86 "Industrial",
87 "Alternative",
88 "Ska",
89 "Death Metal",
90 "Pranks",
91 "Soundtrack",
92 "Euro-Techno",
93 "Ambient",
94 "Trip-Hop",
95 "Vocal",
96 "Jazz+Funk",
97 "Fusion",
98 "Trance",
99 "Classical",
100 "Instrumental",
101 "Acid",
102 "House",
103 "Game",
104 "Sound Clip",
105 "Gospel",
106 "Noise",
107 "Alt",
108 "Bass",
109 "Soul",
110 "Punk",
111 "Space",
112 "Meditative",
113 "Instrumental Pop",
114 "Instrumental Rock",
115 "Ethnic",
116 "Gothic",
117 "Darkwave",
118 "Techno-Industrial",
119 "Electronic",
120 "Pop-Folk",
121 "Eurodance",
122 "Dream",
123 "Southern Rock",
124 "Comedy",
125 "Cult",
126 "Gangsta Rap",
127 "Top 40",
128 "Christian Rap",
129 "Pop/Funk",
130 "Jungle",
131 "Native American",
132 "Cabaret",
133 "New Wave",
134 "Psychedelic",
135 "Rave",
136 "Showtunes",
137 "Trailer",
138 "Lo-Fi",
139 "Tribal",
140 "Acid Punk",
141 "Acid Jazz",
142 "Polka",
143 "Retro",
144 "Musical",
145 "Rock & Roll",
146 "Hard Rock",
147 "Folk",
148 "Folk/Rock",
149 "National Folk",
150 "Swing",
151 "Fast-Fusion",
152 "Bebob",
153 "Latin",
154 "Revival",
155 "Celtic",
156 "Bluegrass",
157 "Avantgarde",
158 "Gothic Rock",
159 "Progressive Rock",
160 "Psychedelic Rock",
161 "Symphonic Rock",
162 "Slow Rock",
163 "Big Band",
164 "Chorus",
165 "Easy Listening",
166 "Acoustic",
167 "Humour",
168 "Speech",
169 "Chanson",
170 "Opera",
171 "Chamber Music",
172 "Sonata",
173 "Symphony",
174 "Booty Bass",
175 "Primus",
176 "Porn Groove",
177 "Satire",
178 "Slow Jam",
179 "Club",
180 "Tango",
181 "Samba",
182 "Folklore",
183 "Ballad",
184 "Power Ballad",
185 "Rhythmic Soul",
186 "Freestyle",
187 "Duet",
188 "Punk Rock",
189 "Drum Solo",
190 "A Cappella",
191 "Euro-House",
192 "Dance Hall",
193 "Goa",
194 "Drum & Bass",
195 "Club-House",
196 "Hardcore",
197 "Terror",
198 "Indie",
199 "BritPop",
200 "Negerpunk",
201 "Polsk Punk",
202 "Beat",
203 "Christian Gangsta Rap",
204 "Heavy Metal",
205 "Black Metal",
206 "Crossover",
207 "Contemporary Christian",
208 "Christian Rock",
209 "Merengue",
210 "Salsa",
211 "Thrash Metal",
212 "Anime",
213 "JPop",
214 "Synthpop"
216 /* }}} */
218 #if 1
219 #define id3_debug(...) d_print(__VA_ARGS__)
220 #else
221 #define id3_debug(...) do { } while (0)
222 #endif
224 static int utf16_is_special(const uchar uch)
226 if (UTF16_IS_HSURROGATE(uch) || UTF16_IS_LSURROGATE(uch) || UTF16_IS_BOM(uch))
227 return -1;
228 return 0;
231 static char *utf16_to_utf8(const unsigned char *buf, int buf_size)
233 char *out;
234 int i, idx;
236 out = xnew(char, (buf_size / 2) * 4 + 1);
237 i = idx = 0;
238 while (buf_size - i >= 2) {
239 uchar u;
241 u = buf[i] + (buf[i + 1] << 8);
242 if (u_is_unicode(u)) {
243 if (utf16_is_special(u) == 0)
244 u_set_char(out, &idx, u);
245 } else {
246 free(out);
247 return NULL;
249 if (u == 0)
250 return out;
251 i += 2;
253 u_set_char(out, &idx, 0);
254 return out;
257 static char *utf16be_to_utf8(const unsigned char *buf, int buf_size)
259 char *out;
260 int i, idx;
262 out = xnew(char, (buf_size / 2) * 4 + 1);
263 i = 0;
264 idx = 0;
265 while (buf_size - i >= 2) {
266 uchar u;
268 u = buf[i + 1] + (buf[i] << 8);
269 if (u_is_unicode(u)) {
270 if (utf16_is_special(u) == 0)
271 u_set_char(out, &idx, u);
272 } else {
273 free(out);
274 return NULL;
276 if (u == 0)
277 return out;
278 i += 2;
280 u_set_char(out, &idx, 0);
281 return out;
284 static int is_v1(const char *buf)
286 return buf[0] == 'T' && buf[1] == 'A' && buf[2] == 'G';
289 static int u32_unsync(const unsigned char *buf, uint32_t *up)
291 uint32_t b, u = 0;
292 int i;
294 for (i = 0; i < 4; i++) {
295 b = buf[i];
296 if (b >= 0x80)
297 return 0;
298 u <<= 7;
299 u |= b;
301 *up = u;
302 return 1;
305 static void get_u32(const unsigned char *buf, uint32_t *up)
307 uint32_t b, u = 0;
308 int i;
310 for (i = 0; i < 4; i++) {
311 b = buf[i];
312 u <<= 8;
313 u |= b;
315 *up = u;
318 static void get_u24(const unsigned char *buf, uint32_t *up)
320 uint32_t b, u = 0;
321 int i;
323 for (i = 0; i < 3; i++) {
324 b = buf[i];
325 u <<= 8;
326 u |= b;
328 *up = u;
331 static int v2_header_footer_parse(struct v2_header *header, const char *buf)
333 const unsigned char *b = (const unsigned char *)buf;
335 header->ver_major = b[3];
336 header->ver_minor = b[4];
337 header->flags = b[5];
338 if (header->ver_major == 0xff || header->ver_minor == 0xff)
339 return 0;
340 return u32_unsync(b + 6, &header->size);
343 static int v2_header_parse(struct v2_header *header, const char *buf)
345 if (buf[0] != 'I' || buf[1] != 'D' || buf[2] != '3')
346 return 0;
347 return v2_header_footer_parse(header, buf);
350 static int v2_footer_parse(struct v2_header *header, const char *buf)
352 if (buf[0] != '3' || buf[1] != 'D' || buf[2] != 'I')
353 return 0;
354 return v2_header_footer_parse(header, buf);
357 static int v2_extended_header_parse(struct v2_extended_header *header, const char *buf)
359 return u32_unsync((const unsigned char *)buf, &header->size);
362 static int is_frame_id_char(char ch)
364 return (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9');
367 /* XXXYYY
369 * X = [A-Z0-9]
370 * Y = byte
372 * XXX is frame
373 * YYY is frame size excluding this 6 byte header
375 static int v2_2_0_frame_header_parse(struct v2_frame_header *header, const char *buf)
377 int i;
379 for (i = 0; i < 3; i++) {
380 if (!is_frame_id_char(buf[i]))
381 return 0;
382 header->id[i] = buf[i];
384 header->id[3] = 0;
385 get_u24((const unsigned char *)(buf + 3), &header->size);
386 header->flags = 0;
387 if (header->size == 0)
388 return 0;
389 id3_debug("%c%c%c %d\n", header->id[0], header->id[1], header->id[2], header->size);
390 return 1;
393 /* XXXXYYYYZZ
395 * X = [A-Z0-9]
396 * Y = byte
397 * Z = byte
399 * XXXX is frame
400 * YYYY is frame size excluding this 10 byte header
401 * ZZ is flags
403 static int v2_3_0_frame_header_parse(struct v2_frame_header *header, const char *buf)
405 int i;
407 for (i = 0; i < 4; i++) {
408 if (!is_frame_id_char(buf[i]))
409 return 0;
410 header->id[i] = buf[i];
412 get_u32((const unsigned char *)(buf + 4), &header->size);
413 header->flags = (buf[8] << 8) | buf[9];
414 if (header->size == 0)
415 return 0;
416 id3_debug("%c%c%c%c %d\n", header->id[0], header->id[1], header->id[2],
417 header->id[3], header->size);
418 return 1;
421 /* same as 2.3 but header size is sync safe */
422 static int v2_4_0_frame_header_parse(struct v2_frame_header *header, const char *buf)
424 int i;
426 for (i = 0; i < 4; i++) {
427 if (!is_frame_id_char(buf[i]))
428 return 0;
429 header->id[i] = buf[i];
431 if (!u32_unsync((const unsigned char *)(buf + 4), &header->size))
432 return 0;
433 header->flags = (buf[8] << 8) | buf[9];
434 if (header->size == 0)
435 return 0;
436 id3_debug("%c%c%c%c %d\n", header->id[0], header->id[1], header->id[2],
437 header->id[3], header->size);
438 return 1;
441 static int read_all(int fd, char *buf, size_t size)
443 size_t pos = 0;
445 while (pos < size) {
446 int rc = read(fd, buf + pos, size - pos);
448 if (rc == -1) {
449 if (errno == EINTR || errno == EAGAIN)
450 continue;
451 return -1;
453 pos += rc;
455 return 0;
458 static char *parse_genre(const char *str)
460 int parenthesis = 0;
461 long int idx;
462 char *end;
464 if (strncasecmp(str, "(RX", 3) == 0)
465 return xstrdup("Remix");
467 if (strncasecmp(str, "(CR", 3) == 0)
468 return xstrdup("Cover");
470 if (*str == '(') {
471 parenthesis = 1;
472 str++;
475 idx = strtol(str, &end, 10);
476 if (str != end) {
477 /* Number parsed but there may be some crap after the number.
478 * I don't care, ID3v2 by definition contains crap.
480 if (idx >= 0 && idx < NR_GENRES)
481 return xstrdup(genres[idx]);
484 if (parenthesis) {
485 const char *ptr = strchr(str, ')');
487 if (ptr && ptr[1]) {
488 /* genre name after random crap in parenthesis,
489 * return the genre name */
490 return xstrdup(ptr + 1);
492 str--;
495 /* random crap, just return it and wait for a bug report */
496 return xstrdup(str);
499 /* http://www.id3.org/id3v2.4.0-structure.txt */
500 static struct {
501 const char name[8];
502 enum id3_key key;
503 } frame_tab[] = {
504 /* 2.4.0 */
505 { "TDRC", ID3_DATE }, // recording date
506 { "TDRL", ID3_DATE }, // release date
507 { "TDOR", ID3_DATE }, // original release date
509 /* >= 2.3.0 */
510 { "TPE1", ID3_ARTIST },
511 { "TALB", ID3_ALBUM },
512 { "TIT2", ID3_TITLE },
513 { "TYER", ID3_DATE },
514 { "TCON", ID3_GENRE },
515 { "TPOS", ID3_DISC },
516 { "TRCK", ID3_TRACK },
517 { "TPE2", ID3_ALBUMARTIST },
519 /* obsolete frames (2.2.0) */
520 { "TP1", ID3_ARTIST },
521 { "TAL", ID3_ALBUM },
522 { "TT2", ID3_TITLE },
523 { "TYE", ID3_DATE },
524 { "TCO", ID3_GENRE },
525 { "TPA", ID3_DISC },
526 { "TRK", ID3_TRACK },
528 { "", -1 }
531 static void fix_date(char *buf)
533 const char *ptr = buf;
534 int ch, len = 0;
536 do {
537 ch = *ptr++;
538 if (ch >= '0' && ch <= '9') {
539 len++;
540 continue;
542 if (len == 4) {
543 // number which length is 4, must be year
544 memmove(buf, ptr - 5, 4);
545 buf[4] = 0;
546 return;
548 len = 0;
549 } while (ch);
550 *buf = 0;
553 static void v2_add_frame(ID3 *id3, struct v2_frame_header *fh, const char *buf)
555 int i, encoding = *buf++, len = fh->size - 1;
557 if (encoding > 3)
558 return;
560 for (i = 0; frame_tab[i].key != -1; i++) {
561 enum id3_key key = frame_tab[i].key;
562 char *in, *out;
563 int rc;
565 if (strncmp(fh->id, frame_tab[i].name, 4))
566 continue;
568 switch (encoding) {
569 case 0x00: /* ISO-8859-1 */
570 in = xstrndup(buf, len);
571 rc = utf8_encode(in, id3_default_charset, &out);
572 free(in);
573 if (rc)
574 return;
575 break;
576 case 0x03: /* UTF-8 */
577 in = xstrndup(buf, len);
578 if (u_is_valid(in)) {
579 out = in;
580 } else {
581 rc = utf8_encode(in, id3_default_charset, &out);
582 free(in);
583 if (rc)
584 return;
586 break;
587 case 0x01: /* UTF-16 */
588 out = utf16_to_utf8((const unsigned char *)buf, len);
589 if (out == NULL)
590 return;
591 break;
592 case 0x02: /* UTF-16BE */
593 out = utf16be_to_utf8((const unsigned char *)buf, len);
594 if (out == NULL)
595 return;
596 break;
599 if (key == ID3_TRACK || key == ID3_DISC)
600 fix_track_or_disc(out);
601 if (key == ID3_GENRE) {
602 char *tmp;
604 id3_debug("genre before: '%s'\n", out);
605 tmp = parse_genre(out);
606 free(out);
607 out = tmp;
609 if (key == ID3_DATE) {
610 id3_debug("date before: '%s'\n", out);
611 fix_date(out);
612 if (!*out) {
613 id3_debug("date parsing failed\n");
614 free(out);
615 return;
619 free(id3->v2[key]);
620 id3->v2[key] = out;
621 id3->has_v2 = 1;
622 id3_debug("%s '%s'\n", frame_tab[i].name, out);
623 break;
627 static void unsync(unsigned char *buf, int *lenp)
629 int len = *lenp;
630 int s, d;
632 s = d = 0;
633 while (s < len - 1) {
634 if (buf[s] == 0xff && buf[s + 1] == 0x00) {
635 /* 0xff 0x00 -> 0xff */
636 buf[d++] = 0xff;
637 s += 2;
639 if (s < len - 2 && buf[s] == 0x00) {
640 /* 0xff 0x00 0x00 -> 0xff 0x00 */
641 buf[d++] = 0x00;
642 s++;
644 continue;
646 buf[d++] = buf[s++];
648 if (s < len)
649 buf[d++] = buf[s++];
651 d_print("unsyncronization removed %d bytes\n", s - d);
652 *lenp = d;
655 static int v2_read(ID3 *id3, int fd, const struct v2_header *header)
657 char *buf;
658 int rc, buf_size;
659 int frame_start, i;
660 int frame_header_size;
662 buf_size = header->size;
663 buf = xnew(char, buf_size);
664 rc = read_all(fd, buf, buf_size);
665 if (rc) {
666 free(buf);
667 return rc;
670 frame_start = 0;
671 if (header->flags & V2_HEADER_EXTENDED) {
672 struct v2_extended_header ext;
674 v2_extended_header_parse(&ext, buf);
675 if (ext.size > buf_size) {
676 id3_debug("extended header corrupted\n");
677 free(buf);
678 return -2;
680 frame_start = ext.size;
681 /* should check if update flag is set */
684 if (header->flags & V2_HEADER_UNSYNC) {
685 int len = buf_size - frame_start;
687 unsync((unsigned char *)(buf + frame_start), &len);
688 buf_size = len + frame_start;
691 frame_header_size = 10;
692 if (header->ver_major == 2)
693 frame_header_size = 6;
695 i = frame_start;
696 while (i < buf_size - frame_header_size) {
697 struct v2_frame_header fh;
698 int len;
700 if (header->ver_major == 2) {
701 if (!v2_2_0_frame_header_parse(&fh, buf + i))
702 break;
703 } else if (header->ver_major == 3) {
704 if (!v2_3_0_frame_header_parse(&fh, buf + i))
705 break;
706 } else {
707 /* assume v2.4 */
708 if (!v2_4_0_frame_header_parse(&fh, buf + i))
709 break;
712 i += frame_header_size;
713 if (fh.size > buf_size - i) {
714 id3_debug("frame too big\n");
715 break;
718 len = fh.size;
719 if (fh.flags & V2_FRAME_UNSYNC) {
720 int tmp = len;
722 unsync((unsigned char *)(buf + i), &tmp);
723 fh.size = tmp;
725 v2_add_frame(id3, &fh, buf + i);
726 i += len;
729 free(buf);
730 return 0;
733 int id3_tag_size(const char *buf, int buf_size)
735 struct v2_header header;
737 if (buf_size < 10)
738 return 0;
739 if (v2_header_parse(&header, buf)) {
740 if (header.flags & V2_HEADER_FOOTER) {
741 /* header + data + footer */
742 id3_debug("v2.%d.%d with footer\n", header.ver_major, header.ver_minor);
743 return 10 + header.size + 10;
745 /* header */
746 id3_debug("v2.%d.%d\n", header.ver_major, header.ver_minor);
747 return 10 + header.size;
749 if (buf_size >= 3 && is_v1(buf)) {
750 id3_debug("v1\n");
751 return 128;
753 return 0;
756 ID3 *id3_new(void)
758 return xnew0(ID3, 1);
761 void id3_free(ID3 *id3)
763 int i;
765 for (i = 0; i < NUM_ID3_KEYS; i++)
766 free(id3->v2[i]);
767 free(id3);
770 int id3_read_tags(ID3 *id3, int fd, unsigned int flags)
772 off_t off;
773 int rc;
775 if (flags & ID3_V2) {
776 struct v2_header header;
777 char buf[138];
779 rc = read_all(fd, buf, 10);
780 if (rc)
781 goto rc_error;
782 if (v2_header_parse(&header, buf)) {
783 rc = v2_read(id3, fd, &header);
784 if (rc)
785 goto rc_error;
786 /* get v1 if needed */
787 } else {
788 /* get v2 from end and optionally v1 */
790 off = lseek(fd, -138, SEEK_END);
791 if (off == -1)
792 goto error;
793 rc = read_all(fd, buf, 138);
794 if (rc)
795 goto rc_error;
797 if (is_v1(buf + 10)) {
798 if (flags & ID3_V1) {
799 memcpy(id3->v1, buf + 10, 128);
800 id3->has_v1 = 1;
802 if (v2_footer_parse(&header, buf)) {
803 /* footer at end of file - 128 */
804 off = lseek(fd, -(header.size + 138), SEEK_END);
805 if (off == -1)
806 goto error;
807 rc = v2_read(id3, fd, &header);
808 if (rc)
809 goto rc_error;
811 } else if (v2_footer_parse(&header, buf + 128)) {
812 /* footer at end of file */
813 off = lseek(fd, -(header.size + 10), SEEK_END);
814 if (off == -1)
815 goto error;
816 rc = v2_read(id3, fd, &header);
817 if (rc)
818 goto rc_error;
820 return 0;
823 if (flags & ID3_V1) {
824 off = lseek(fd, -128, SEEK_END);
825 if (off == -1)
826 goto error;
827 rc = read_all(fd, id3->v1, 128);
828 if (rc)
829 goto rc_error;
830 id3->has_v1 = is_v1(id3->v1);
832 return 0;
833 error:
834 rc = -1;
835 rc_error:
836 return rc;
839 static char *v1_get_str(const char *buf, int len)
841 char in[32];
842 char *out;
843 int i;
845 for (i = len - 1; i >= 0; i--) {
846 if (buf[i] != 0 && buf[i] != ' ')
847 break;
849 if (i == -1)
850 return NULL;
851 memcpy(in, buf, i + 1);
852 in[i + 1] = 0;
853 if (u_is_valid(in))
854 return xstrdup(in);
855 if (utf8_encode(in, id3_default_charset, &out))
856 return NULL;
857 return out;
860 char *id3_get_comment(ID3 *id3, enum id3_key key)
862 if (id3->has_v2) {
863 if (id3->v2[key])
864 return xstrdup(id3->v2[key]);
866 if (id3->has_v1) {
867 switch (key) {
868 case ID3_ARTIST:
869 return v1_get_str(id3->v1 + 33, 30);
870 case ID3_ALBUM:
871 return v1_get_str(id3->v1 + 63, 30);
872 case ID3_TITLE:
873 return v1_get_str(id3->v1 + 3, 30);
874 case ID3_DATE:
875 return v1_get_str(id3->v1 + 93, 4);
876 case ID3_GENRE:
878 unsigned char idx = id3->v1[127];
880 if (idx >= NR_GENRES)
881 return NULL;
882 return xstrdup(genres[idx]);
884 case ID3_TRACK:
886 char *t;
888 if (id3->v1[125] != 0)
889 return NULL;
890 t = xnew(char, 4);
891 snprintf(t, 4, "%d", ((unsigned char *)id3->v1)[126]);
892 return t;
894 case ID3_DISC:
895 case ID3_ALBUMARTIST:
896 case NUM_ID3_KEYS:
897 return NULL;
900 return NULL;