2 * Copyright 2005 Timo Hirvonen
8 #include "utf8_encode.h"
26 * if v2 is at beginning _and_ at end then there must be a seek tag at beginning
31 char *v2
[NUM_ID3_KEYS
];
33 unsigned int has_v1
: 1;
34 unsigned int has_v2
: 1;
38 unsigned char ver_major
;
39 unsigned char ver_minor
;
44 struct v2_extended_header
{
48 struct v2_frame_header
{
54 #define V2_HEADER_UNSYNC (1 << 7)
55 #define V2_HEADER_EXTENDED (1 << 6)
56 #define V2_HEADER_EXPERIMENTAL (1 << 5)
57 #define V2_HEADER_FOOTER (1 << 4)
59 #define V2_FRAME_COMPRESSED (1 << 3) /* great idea!!1 */
60 #define V2_FRAME_ENCRYPTHED (1 << 2) /* wow, this is very neat! */
61 #define V2_FRAME_UNSYNC (1 << 1)
62 #define V2_FRAME_LEN_INDICATOR (1 << 0)
66 static const char *genres
[NR_GENRES
] = {
203 "Christian Gangsta Rap",
207 "Contemporary Christian",
219 #define id3_debug(...) d_print(__VA_ARGS__)
221 #define id3_debug(...) do { } while (0)
224 static int utf16_is_special(const uchar uch
)
226 if (UTF16_IS_HSURROGATE(uch
) || UTF16_IS_LSURROGATE(uch
) || UTF16_IS_BOM(uch
))
231 static char *utf16_to_utf8(const unsigned char *buf
, int buf_size
)
236 out
= xnew(char, (buf_size
/ 2) * 4 + 1);
238 while (buf_size
- i
>= 2) {
241 u
= buf
[i
] + (buf
[i
+ 1] << 8);
242 if (u_is_unicode(u
)) {
243 if (utf16_is_special(u
) == 0)
244 u_set_char(out
, &idx
, u
);
253 u_set_char(out
, &idx
, 0);
257 static char *utf16be_to_utf8(const unsigned char *buf
, int buf_size
)
262 out
= xnew(char, (buf_size
/ 2) * 4 + 1);
265 while (buf_size
- i
>= 2) {
268 u
= buf
[i
+ 1] + (buf
[i
] << 8);
269 if (u_is_unicode(u
)) {
270 if (utf16_is_special(u
) == 0)
271 u_set_char(out
, &idx
, u
);
280 u_set_char(out
, &idx
, 0);
284 static int is_v1(const char *buf
)
286 return buf
[0] == 'T' && buf
[1] == 'A' && buf
[2] == 'G';
289 static int u32_unsync(const unsigned char *buf
, uint32_t *up
)
294 for (i
= 0; i
< 4; i
++) {
305 static void get_u32(const unsigned char *buf
, uint32_t *up
)
310 for (i
= 0; i
< 4; i
++) {
318 static void get_u24(const unsigned char *buf
, uint32_t *up
)
323 for (i
= 0; i
< 3; i
++) {
331 static int v2_header_footer_parse(struct v2_header
*header
, const char *buf
)
333 const unsigned char *b
= (const unsigned char *)buf
;
335 header
->ver_major
= b
[3];
336 header
->ver_minor
= b
[4];
337 header
->flags
= b
[5];
338 if (header
->ver_major
== 0xff || header
->ver_minor
== 0xff)
340 return u32_unsync(b
+ 6, &header
->size
);
343 static int v2_header_parse(struct v2_header
*header
, const char *buf
)
345 if (buf
[0] != 'I' || buf
[1] != 'D' || buf
[2] != '3')
347 return v2_header_footer_parse(header
, buf
);
350 static int v2_footer_parse(struct v2_header
*header
, const char *buf
)
352 if (buf
[0] != '3' || buf
[1] != 'D' || buf
[2] != 'I')
354 return v2_header_footer_parse(header
, buf
);
357 static int v2_extended_header_parse(struct v2_extended_header
*header
, const char *buf
)
359 return u32_unsync((const unsigned char *)buf
, &header
->size
);
362 static int is_frame_id_char(char ch
)
364 return (ch
>= 'A' && ch
<= 'Z') || (ch
>= '0' && ch
<= '9');
373 * YYY is frame size excluding this 6 byte header
375 static int v2_2_0_frame_header_parse(struct v2_frame_header
*header
, const char *buf
)
379 for (i
= 0; i
< 3; i
++) {
380 if (!is_frame_id_char(buf
[i
]))
382 header
->id
[i
] = buf
[i
];
385 get_u24((const unsigned char *)(buf
+ 3), &header
->size
);
387 if (header
->size
== 0)
389 id3_debug("%c%c%c %d\n", header
->id
[0], header
->id
[1], header
->id
[2], header
->size
);
400 * YYYY is frame size excluding this 10 byte header
403 static int v2_3_0_frame_header_parse(struct v2_frame_header
*header
, const char *buf
)
407 for (i
= 0; i
< 4; i
++) {
408 if (!is_frame_id_char(buf
[i
]))
410 header
->id
[i
] = buf
[i
];
412 get_u32((const unsigned char *)(buf
+ 4), &header
->size
);
413 header
->flags
= (buf
[8] << 8) | buf
[9];
414 if (header
->size
== 0)
416 id3_debug("%c%c%c%c %d\n", header
->id
[0], header
->id
[1], header
->id
[2],
417 header
->id
[3], header
->size
);
421 /* same as 2.3 but header size is sync safe */
422 static int v2_4_0_frame_header_parse(struct v2_frame_header
*header
, const char *buf
)
426 for (i
= 0; i
< 4; i
++) {
427 if (!is_frame_id_char(buf
[i
]))
429 header
->id
[i
] = buf
[i
];
431 if (!u32_unsync((const unsigned char *)(buf
+ 4), &header
->size
))
433 header
->flags
= (buf
[8] << 8) | buf
[9];
434 if (header
->size
== 0)
436 id3_debug("%c%c%c%c %d\n", header
->id
[0], header
->id
[1], header
->id
[2],
437 header
->id
[3], header
->size
);
441 static int read_all(int fd
, char *buf
, size_t size
)
446 int rc
= read(fd
, buf
+ pos
, size
- pos
);
449 if (errno
== EINTR
|| errno
== EAGAIN
)
458 static char *parse_genre(const char *str
)
464 if (strncasecmp(str
, "(RX", 3) == 0)
465 return xstrdup("Remix");
467 if (strncasecmp(str
, "(CR", 3) == 0)
468 return xstrdup("Cover");
475 idx
= strtol(str
, &end
, 10);
477 /* Number parsed but there may be some crap after the number.
478 * I don't care, ID3v2 by definition contains crap.
480 if (idx
>= 0 && idx
< NR_GENRES
)
481 return xstrdup(genres
[idx
]);
485 const char *ptr
= strchr(str
, ')');
488 /* genre name after random crap in parenthesis,
489 * return the genre name */
490 return xstrdup(ptr
+ 1);
495 /* random crap, just return it and wait for a bug report */
499 /* http://www.id3.org/id3v2.4.0-structure.txt */
505 { "TDRC", ID3_DATE
},
508 { "TPE1", ID3_ARTIST
},
509 { "TALB", ID3_ALBUM
},
510 { "TIT2", ID3_TITLE
},
511 { "TYER", ID3_DATE
},
512 { "TCON", ID3_GENRE
},
513 { "TPOS", ID3_DISC
},
514 { "TRCK", ID3_TRACK
},
515 { "TPE2", ID3_ALBUMARTIST
},
517 /* obsolete frames (2.2.0) */
518 { "TP1", ID3_ARTIST
},
519 { "TAL", ID3_ALBUM
},
520 { "TT2", ID3_TITLE
},
522 { "TCO", ID3_GENRE
},
524 { "TRK", ID3_TRACK
},
529 static void v2_add_frame(ID3
*id3
, struct v2_frame_header
*fh
, const char *buf
)
531 int i
, encoding
= *buf
++, len
= fh
->size
- 1;
536 for (i
= 0; frame_tab
[i
].key
!= -1; i
++) {
537 enum id3_key key
= frame_tab
[i
].key
;
541 if (strncmp(fh
->id
, frame_tab
[i
].name
, 4))
545 case 0x00: /* ISO-8859-1 */
546 in
= xstrndup(buf
, len
);
547 rc
= utf8_encode(in
, id3_default_charset
, &out
);
552 case 0x03: /* UTF-8 */
553 in
= xstrndup(buf
, len
);
554 if (u_is_valid(in
)) {
557 rc
= utf8_encode(in
, id3_default_charset
, &out
);
563 case 0x01: /* UTF-16 */
564 out
= utf16_to_utf8((const unsigned char *)buf
, len
);
568 case 0x02: /* UTF-16BE */
569 out
= utf16be_to_utf8((const unsigned char *)buf
, len
);
574 if (key
== ID3_TRACK
|| key
== ID3_DISC
)
575 fix_track_or_disc(out
);
576 if (key
== ID3_GENRE
) {
579 id3_debug("genre before: '%s'\n", out
);
580 tmp
= parse_genre(out
);
587 id3_debug("%s '%s'\n", frame_tab
[i
].name
, out
);
592 static void unsync(unsigned char *buf
, int *lenp
)
598 while (s
< len
- 1) {
599 if (buf
[s
] == 0xff && buf
[s
+ 1] == 0x00) {
600 /* 0xff 0x00 -> 0xff */
604 if (s
< len
- 2 && buf
[s
] == 0x00) {
605 /* 0xff 0x00 0x00 -> 0xff 0x00 */
616 d_print("unsyncronization removed %d bytes\n", s
- d
);
620 static int v2_read(ID3
*id3
, int fd
, const struct v2_header
*header
)
625 int frame_header_size
;
627 buf_size
= header
->size
;
628 buf
= xnew(char, buf_size
);
629 rc
= read_all(fd
, buf
, buf_size
);
636 if (header
->flags
& V2_HEADER_EXTENDED
) {
637 struct v2_extended_header ext
;
639 v2_extended_header_parse(&ext
, buf
);
640 if (ext
.size
> buf_size
) {
641 id3_debug("extended header corrupted\n");
645 frame_start
= ext
.size
;
646 /* should check if update flag is set */
649 if (header
->flags
& V2_HEADER_UNSYNC
) {
650 int len
= buf_size
- frame_start
;
652 unsync((unsigned char *)(buf
+ frame_start
), &len
);
653 buf_size
= len
+ frame_start
;
656 frame_header_size
= 10;
657 if (header
->ver_major
== 2)
658 frame_header_size
= 6;
661 while (i
< buf_size
- frame_header_size
) {
662 struct v2_frame_header fh
;
665 if (header
->ver_major
== 2) {
666 if (!v2_2_0_frame_header_parse(&fh
, buf
+ i
))
668 } else if (header
->ver_major
== 3) {
669 if (!v2_3_0_frame_header_parse(&fh
, buf
+ i
))
673 if (!v2_4_0_frame_header_parse(&fh
, buf
+ i
))
677 i
+= frame_header_size
;
678 if (fh
.size
> buf_size
- i
) {
679 id3_debug("frame too big\n");
684 if (fh
.flags
& V2_FRAME_UNSYNC
) {
687 unsync((unsigned char *)(buf
+ i
), &tmp
);
690 v2_add_frame(id3
, &fh
, buf
+ i
);
698 int id3_tag_size(const char *buf
, int buf_size
)
700 struct v2_header header
;
704 if (v2_header_parse(&header
, buf
)) {
705 if (header
.flags
& V2_HEADER_FOOTER
) {
706 /* header + data + footer */
707 id3_debug("v2.%d.%d with footer\n", header
.ver_major
, header
.ver_minor
);
708 return 10 + header
.size
+ 10;
711 id3_debug("v2.%d.%d\n", header
.ver_major
, header
.ver_minor
);
712 return 10 + header
.size
;
714 if (buf_size
>= 3 && is_v1(buf
)) {
723 return xnew0(ID3
, 1);
726 void id3_free(ID3
*id3
)
730 for (i
= 0; i
< NUM_ID3_KEYS
; i
++)
735 int id3_read_tags(ID3
*id3
, int fd
, unsigned int flags
)
740 if (flags
& ID3_V2
) {
741 struct v2_header header
;
744 rc
= read_all(fd
, buf
, 10);
747 if (v2_header_parse(&header
, buf
)) {
748 rc
= v2_read(id3
, fd
, &header
);
751 /* get v1 if needed */
753 /* get v2 from end and optionally v1 */
755 off
= lseek(fd
, -138, SEEK_END
);
758 rc
= read_all(fd
, buf
, 138);
762 if (is_v1(buf
+ 10)) {
763 if (flags
& ID3_V1
) {
764 memcpy(id3
->v1
, buf
+ 10, 128);
767 if (v2_footer_parse(&header
, buf
)) {
768 /* footer at end of file - 128 */
769 off
= lseek(fd
, -(header
.size
+ 138), SEEK_END
);
772 rc
= v2_read(id3
, fd
, &header
);
776 } else if (v2_footer_parse(&header
, buf
+ 128)) {
777 /* footer at end of file */
778 off
= lseek(fd
, -(header
.size
+ 10), SEEK_END
);
781 rc
= v2_read(id3
, fd
, &header
);
788 if (flags
& ID3_V1
) {
789 off
= lseek(fd
, -128, SEEK_END
);
792 rc
= read_all(fd
, id3
->v1
, 128);
795 id3
->has_v1
= is_v1(id3
->v1
);
804 static char *v1_get_str(const char *buf
, int len
)
810 for (i
= len
- 1; i
>= 0; i
--) {
811 if (buf
[i
] != 0 && buf
[i
] != ' ')
816 memcpy(in
, buf
, i
+ 1);
820 if (utf8_encode(in
, id3_default_charset
, &out
))
825 char *id3_get_comment(ID3
*id3
, enum id3_key key
)
829 return xstrdup(id3
->v2
[key
]);
834 return v1_get_str(id3
->v1
+ 33, 30);
836 return v1_get_str(id3
->v1
+ 63, 30);
838 return v1_get_str(id3
->v1
+ 3, 30);
840 return v1_get_str(id3
->v1
+ 93, 4);
843 unsigned char idx
= id3
->v1
[127];
845 if (idx
>= NR_GENRES
)
847 return xstrdup(genres
[idx
]);
853 if (id3
->v1
[125] != 0)
856 snprintf(t
, 4, "%d", ((unsigned char *)id3
->v1
)[126]);
860 case ID3_ALBUMARTIST
: