2 * Copyright 2005 Timo Hirvonen
8 #include "utf8_encode.h"
26 * if v2 is at beginning _and_ at end then there must be a seek tag at beginning
30 unsigned char ver_major
;
31 unsigned char ver_minor
;
36 struct v2_extended_header
{
40 struct v2_frame_header
{
46 #define V2_HEADER_UNSYNC (1 << 7)
47 #define V2_HEADER_EXTENDED (1 << 6)
48 #define V2_HEADER_EXPERIMENTAL (1 << 5)
49 #define V2_HEADER_FOOTER (1 << 4)
51 #define V2_FRAME_COMPRESSED (1 << 3) /* great idea!!1 */
52 #define V2_FRAME_ENCRYPTHED (1 << 2) /* wow, this is very neat! */
53 #define V2_FRAME_UNSYNC (1 << 1)
54 #define V2_FRAME_LEN_INDICATOR (1 << 0)
58 static const char *genres
[NR_GENRES
] = {
195 "Christian Gangsta Rap",
199 "Contemporary Christian",
211 #define id3_debug(...) d_print(__VA_ARGS__)
213 #define id3_debug(...) do { } while (0)
216 const char * const id3_key_names
[NUM_ID3_KEYS
] = {
228 "replaygain_track_gain",
229 "replaygain_track_peak",
230 "replaygain_album_gain",
231 "replaygain_album_peak",
235 static int utf16_is_special(const uchar uch
)
237 if (UTF16_IS_HSURROGATE(uch
) || UTF16_IS_LSURROGATE(uch
) || UTF16_IS_BOM(uch
))
242 static char *utf16_to_utf8(const unsigned char *buf
, int buf_size
)
247 out
= xnew(char, (buf_size
/ 2) * 4 + 1);
249 while (buf_size
- i
>= 2) {
252 u
= buf
[i
] + (buf
[i
+ 1] << 8);
253 if (u_is_unicode(u
)) {
254 if (utf16_is_special(u
) == 0)
255 u_set_char(out
, &idx
, u
);
264 u_set_char(out
, &idx
, 0);
268 static char *utf16be_to_utf8(const unsigned char *buf
, int buf_size
)
273 out
= xnew(char, (buf_size
/ 2) * 4 + 1);
276 while (buf_size
- i
>= 2) {
279 u
= buf
[i
+ 1] + (buf
[i
] << 8);
280 if (u_is_unicode(u
)) {
281 if (utf16_is_special(u
) == 0)
282 u_set_char(out
, &idx
, u
);
291 u_set_char(out
, &idx
, 0);
295 static int is_v1(const char *buf
)
297 return buf
[0] == 'T' && buf
[1] == 'A' && buf
[2] == 'G';
300 static int u32_unsync(const unsigned char *buf
, uint32_t *up
)
305 for (i
= 0; i
< 4; i
++) {
316 static void get_u32(const unsigned char *buf
, uint32_t *up
)
321 for (i
= 0; i
< 4; i
++) {
329 static void get_u24(const unsigned char *buf
, uint32_t *up
)
334 for (i
= 0; i
< 3; i
++) {
342 static int v2_header_footer_parse(struct v2_header
*header
, const char *buf
)
344 const unsigned char *b
= (const unsigned char *)buf
;
346 header
->ver_major
= b
[3];
347 header
->ver_minor
= b
[4];
348 header
->flags
= b
[5];
349 if (header
->ver_major
== 0xff || header
->ver_minor
== 0xff)
351 return u32_unsync(b
+ 6, &header
->size
);
354 static int v2_header_parse(struct v2_header
*header
, const char *buf
)
356 if (buf
[0] != 'I' || buf
[1] != 'D' || buf
[2] != '3')
358 return v2_header_footer_parse(header
, buf
);
361 static int v2_footer_parse(struct v2_header
*header
, const char *buf
)
363 if (buf
[0] != '3' || buf
[1] != 'D' || buf
[2] != 'I')
365 return v2_header_footer_parse(header
, buf
);
368 static int v2_extended_header_parse(struct v2_extended_header
*header
, const char *buf
)
370 return u32_unsync((const unsigned char *)buf
, &header
->size
);
373 static int is_frame_id_char(char ch
)
375 return (ch
>= 'A' && ch
<= 'Z') || (ch
>= '0' && ch
<= '9');
384 * YYY is frame size excluding this 6 byte header
386 static int v2_2_0_frame_header_parse(struct v2_frame_header
*header
, const char *buf
)
390 for (i
= 0; i
< 3; i
++) {
391 if (!is_frame_id_char(buf
[i
]))
393 header
->id
[i
] = buf
[i
];
396 get_u24((const unsigned char *)(buf
+ 3), &header
->size
);
398 if (header
->size
== 0)
400 id3_debug("%c%c%c %d\n", header
->id
[0], header
->id
[1], header
->id
[2], header
->size
);
411 * YYYY is frame size excluding this 10 byte header
414 static int v2_3_0_frame_header_parse(struct v2_frame_header
*header
, const char *buf
)
418 for (i
= 0; i
< 4; i
++) {
419 if (!is_frame_id_char(buf
[i
]))
421 header
->id
[i
] = buf
[i
];
423 get_u32((const unsigned char *)(buf
+ 4), &header
->size
);
424 header
->flags
= (buf
[8] << 8) | buf
[9];
425 if (header
->size
== 0)
427 id3_debug("%c%c%c%c %d\n", header
->id
[0], header
->id
[1], header
->id
[2],
428 header
->id
[3], header
->size
);
432 /* same as 2.3 but header size is sync safe */
433 static int v2_4_0_frame_header_parse(struct v2_frame_header
*header
, const char *buf
)
437 for (i
= 0; i
< 4; i
++) {
438 if (!is_frame_id_char(buf
[i
]))
440 header
->id
[i
] = buf
[i
];
442 if (!u32_unsync((const unsigned char *)(buf
+ 4), &header
->size
))
444 header
->flags
= (buf
[8] << 8) | buf
[9];
445 if (header
->size
== 0)
447 id3_debug("%c%c%c%c %d\n", header
->id
[0], header
->id
[1], header
->id
[2],
448 header
->id
[3], header
->size
);
452 static int read_all(int fd
, char *buf
, size_t size
)
457 int rc
= read(fd
, buf
+ pos
, size
- pos
);
460 if (errno
== EINTR
|| errno
== EAGAIN
)
469 static char *parse_genre(const char *str
)
475 if (strncasecmp(str
, "(RX", 3) == 0)
476 return xstrdup("Remix");
478 if (strncasecmp(str
, "(CR", 3) == 0)
479 return xstrdup("Cover");
486 idx
= strtol(str
, &end
, 10);
488 /* Number parsed but there may be some crap after the number.
489 * I don't care, ID3v2 by definition contains crap.
491 if (idx
>= 0 && idx
< NR_GENRES
)
492 return xstrdup(genres
[idx
]);
496 const char *ptr
= strchr(str
, ')');
499 /* genre name after random crap in parenthesis,
500 * return the genre name */
501 return xstrdup(ptr
+ 1);
506 /* random crap, just return it and wait for a bug report */
510 /* http://www.id3.org/id3v2.4.0-structure.txt */
516 { "TDRC", ID3_DATE
}, // recording date
517 { "TDRL", ID3_DATE
}, // release date
518 { "TDOR", ID3_DATE
}, // original release date
519 { "TSOP", ID3_ARTISTSORT
},
522 { "TPE1", ID3_ARTIST
},
523 { "TALB", ID3_ALBUM
},
524 { "TIT2", ID3_TITLE
},
525 { "TYER", ID3_DATE
},
526 { "TCON", ID3_GENRE
},
527 { "TPOS", ID3_DISC
},
528 { "TRCK", ID3_TRACK
},
529 { "TPE2", ID3_ALBUMARTIST
},
530 { "XSOP", ID3_ARTISTSORT
}, // obsolete
531 { "TCMP", ID3_COMPILATION
},
533 /* obsolete frames (2.2.0) */
534 { "TP1", ID3_ARTIST
},
535 { "TAL", ID3_ALBUM
},
536 { "TT2", ID3_TITLE
},
538 { "TCO", ID3_GENRE
},
540 { "TRK", ID3_TRACK
},
545 static int frame_tab_index(const char *id
)
549 for (i
= 0; frame_tab
[i
].key
!= -1; i
++) {
550 if (!strncmp(id
, frame_tab
[i
].name
, 4))
556 static void fix_date(char *buf
)
558 const char *ptr
= buf
;
563 if (ch
>= '0' && ch
<= '9') {
568 // number which length is 4, must be year
569 memmove(buf
, ptr
- 5, 4);
578 static char *decode_str(const char *buf
, int len
, int encoding
)
580 char *in
, *out
= NULL
;
584 case 0x00: /* ISO-8859-1 */
585 in
= xstrndup(buf
, len
);
586 rc
= utf8_encode(in
, id3_default_charset
, &out
);
589 case 0x03: /* UTF-8 */
590 in
= xstrndup(buf
, len
);
591 if (u_is_valid(in
)) {
594 rc
= utf8_encode(in
, id3_default_charset
, &out
);
598 case 0x01: /* UTF-16 */
599 out
= utf16_to_utf8((const unsigned char *)buf
, len
);
601 case 0x02: /* UTF-16BE */
602 out
= utf16be_to_utf8((const unsigned char *)buf
, len
);
608 static void add_v2(struct id3tag
*id3
, enum id3_key key
, char *value
)
611 id3
->v2
[key
] = value
;
615 static void decode_normal(struct id3tag
*id3
, const char *buf
, int len
, int encoding
, enum id3_key key
)
617 char *out
= decode_str(buf
, len
, encoding
);
622 if (key
== ID3_GENRE
) {
625 id3_debug("genre before: '%s'\n", out
);
626 tmp
= parse_genre(out
);
630 if (key
== ID3_DATE
) {
631 id3_debug("date before: '%s'\n", out
);
634 id3_debug("date parsing failed\n");
639 add_v2(id3
, key
, out
);
642 static void decode_txxx(struct id3tag
*id3
, const char *buf
, int len
, int encoding
)
644 enum id3_key key
= NUM_ID3_KEYS
;
648 out
= decode_str(buf
, len
, encoding
);
652 id3_debug("TXXX, key = '%s'\n", out
);
653 if (!strcasecmp(out
, "replaygain_track_gain"))
654 key
= ID3_RG_TRACK_GAIN
;
655 if (!strcasecmp(out
, "replaygain_track_peak"))
656 key
= ID3_RG_TRACK_PEAK
;
657 if (!strcasecmp(out
, "replaygain_album_gain"))
658 key
= ID3_RG_ALBUM_GAIN
;
659 if (!strcasecmp(out
, "replaygain_album_peak"))
660 key
= ID3_RG_ALBUM_PEAK
;
661 if (!strcasecmp(out
, "album artist"))
662 key
= ID3_ALBUMARTIST
;
663 if (!strcasecmp(out
, "albumartistsort"))
664 key
= ID3_ALBUMARTISTSORT
;
665 if (!strcasecmp(out
, "compilation"))
666 key
= ID3_COMPILATION
;
668 size
= strlen(out
) + 1;
671 if (key
== NUM_ID3_KEYS
)
679 out
= decode_str(buf
, len
, encoding
);
683 add_v2(id3
, key
, out
);
686 static void decode_comment(struct id3tag
*id3
, const char *buf
, int len
, int encoding
)
697 out
= decode_str(buf
, len
, encoding
);
701 add_v2(id3
, ID3_COMMENT
, out
);
704 static void v2_add_frame(struct id3tag
*id3
, struct v2_frame_header
*fh
, const char *buf
)
706 int encoding
= *buf
++;
707 int len
= fh
->size
- 1;
713 idx
= frame_tab_index(fh
->id
);
715 decode_normal(id3
, buf
, len
, encoding
, frame_tab
[idx
].key
);
716 } else if (!strncmp(fh
->id
, "TXXX", 4)) {
717 decode_txxx(id3
, buf
, len
, encoding
);
718 } else if (!strncmp(fh
->id
, "COMM", 4)) {
719 decode_comment(id3
, buf
, len
, encoding
);
720 } else if (!strncmp(fh
->id
, "COM", 4)) {
721 decode_comment(id3
, buf
, len
, encoding
);
725 static void unsync(unsigned char *buf
, int *lenp
)
731 while (s
< len
- 1) {
732 if (buf
[s
] == 0xff && buf
[s
+ 1] == 0x00) {
733 /* 0xff 0x00 -> 0xff */
737 if (s
< len
- 2 && buf
[s
] == 0x00) {
738 /* 0xff 0x00 0x00 -> 0xff 0x00 */
749 d_print("unsyncronization removed %d bytes\n", s
- d
);
753 static int v2_read(struct id3tag
*id3
, int fd
, const struct v2_header
*header
)
758 int frame_header_size
;
760 buf_size
= header
->size
;
761 buf
= xnew(char, buf_size
);
762 rc
= read_all(fd
, buf
, buf_size
);
769 if (header
->flags
& V2_HEADER_EXTENDED
) {
770 struct v2_extended_header ext
;
772 if (!v2_extended_header_parse(&ext
, buf
) || ext
.size
> buf_size
) {
773 id3_debug("extended header corrupted\n");
777 frame_start
= ext
.size
;
778 /* should check if update flag is set */
781 if (header
->flags
& V2_HEADER_UNSYNC
) {
782 int len
= buf_size
- frame_start
;
784 unsync((unsigned char *)(buf
+ frame_start
), &len
);
785 buf_size
= len
+ frame_start
;
788 frame_header_size
= 10;
789 if (header
->ver_major
== 2)
790 frame_header_size
= 6;
793 while (i
< buf_size
- frame_header_size
) {
794 struct v2_frame_header fh
;
797 if (header
->ver_major
== 2) {
798 if (!v2_2_0_frame_header_parse(&fh
, buf
+ i
))
800 } else if (header
->ver_major
== 3) {
801 if (!v2_3_0_frame_header_parse(&fh
, buf
+ i
))
805 if (!v2_4_0_frame_header_parse(&fh
, buf
+ i
))
809 i
+= frame_header_size
;
810 if (fh
.size
> buf_size
- i
) {
811 id3_debug("frame too big\n");
816 if (fh
.flags
& V2_FRAME_UNSYNC
) {
819 unsync((unsigned char *)(buf
+ i
), &tmp
);
822 v2_add_frame(id3
, &fh
, buf
+ i
);
830 int id3_tag_size(const char *buf
, int buf_size
)
832 struct v2_header header
;
836 if (v2_header_parse(&header
, buf
)) {
837 if (header
.flags
& V2_HEADER_FOOTER
) {
838 /* header + data + footer */
839 id3_debug("v2.%d.%d with footer\n", header
.ver_major
, header
.ver_minor
);
840 return 10 + header
.size
+ 10;
843 id3_debug("v2.%d.%d\n", header
.ver_major
, header
.ver_minor
);
844 return 10 + header
.size
;
846 if (buf_size
>= 3 && is_v1(buf
)) {
853 void id3_free(struct id3tag
*id3
)
857 for (i
= 0; i
< NUM_ID3_KEYS
; i
++)
861 int id3_read_tags(struct id3tag
*id3
, int fd
, unsigned int flags
)
866 if (flags
& ID3_V2
) {
867 struct v2_header header
;
870 rc
= read_all(fd
, buf
, 10);
873 if (v2_header_parse(&header
, buf
)) {
874 rc
= v2_read(id3
, fd
, &header
);
877 /* get v1 if needed */
879 /* get v2 from end and optionally v1 */
881 off
= lseek(fd
, -138, SEEK_END
);
884 rc
= read_all(fd
, buf
, 138);
888 if (is_v1(buf
+ 10)) {
889 if (flags
& ID3_V1
) {
890 memcpy(id3
->v1
, buf
+ 10, 128);
893 if (v2_footer_parse(&header
, buf
)) {
894 /* footer at end of file - 128 */
895 off
= lseek(fd
, -(header
.size
+ 138), SEEK_END
);
898 rc
= v2_read(id3
, fd
, &header
);
902 } else if (v2_footer_parse(&header
, buf
+ 128)) {
903 /* footer at end of file */
904 off
= lseek(fd
, -(header
.size
+ 10), SEEK_END
);
907 rc
= v2_read(id3
, fd
, &header
);
914 if (flags
& ID3_V1
) {
915 off
= lseek(fd
, -128, SEEK_END
);
918 rc
= read_all(fd
, id3
->v1
, 128);
921 id3
->has_v1
= is_v1(id3
->v1
);
930 static char *v1_get_str(const char *buf
, int len
)
936 for (i
= len
- 1; i
>= 0; i
--) {
937 if (buf
[i
] != 0 && buf
[i
] != ' ')
942 memcpy(in
, buf
, i
+ 1);
946 if (utf8_encode(in
, id3_default_charset
, &out
))
951 char *id3_get_comment(struct id3tag
*id3
, enum id3_key key
)
955 return xstrdup(id3
->v2
[key
]);
960 return v1_get_str(id3
->v1
+ 33, 30);
962 return v1_get_str(id3
->v1
+ 63, 30);
964 return v1_get_str(id3
->v1
+ 3, 30);
966 return v1_get_str(id3
->v1
+ 93, 4);
969 unsigned char idx
= id3
->v1
[127];
971 if (idx
>= NR_GENRES
)
973 return xstrdup(genres
[idx
]);
979 if (id3
->v1
[125] != 0)
982 snprintf(t
, 4, "%d", ((unsigned char *)id3
->v1
)[126]);