demux: es: fix swab usage
[vlc.git] / modules / demux / xiph_metadata.c
blob2b9985ca2fc55e57747e81e353f006466e995e63
1 /*****************************************************************************
2 * xiph_metadata.h: Vorbis Comment parser
3 *****************************************************************************
4 * Copyright © 2008-2013 VLC authors and VideoLAN
5 * $Id$
7 * Authors: Laurent Aimar <fenrir _AT_ videolan _DOT_ org>
8 * Jean-Baptiste Kempf <jb@videolan.org>
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU Lesser General Public License as published by
12 * the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this program; if not, write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
23 *****************************************************************************/
25 #ifdef HAVE_CONFIG_H
26 # include "config.h"
27 #endif
29 #include <assert.h>
31 #include <vlc_common.h>
32 #include <vlc_charset.h>
33 #include <vlc_strings.h>
34 #include <vlc_arrays.h>
35 #include <vlc_input.h>
36 #include "xiph_metadata.h"
38 input_attachment_t* ParseFlacPicture( const uint8_t *p_data, size_t size,
39 int i_attachments, int *i_cover_score, int *i_cover_idx )
41 /* TODO: Merge with ID3v2 copy in modules/meta_engine/taglib.cpp. */
42 static const char pi_cover_score[] = {
43 0, /* Other */
44 5, /* 32x32 PNG image that should be used as the file icon */
45 4, /* File icon of a different size or format. */
46 20, /* Front cover image of the album. */
47 19, /* Back cover image of the album. */
48 13, /* Inside leaflet page of the album. */
49 18, /* Image from the album itself. */
50 17, /* Picture of the lead artist or soloist. */
51 16, /* Picture of the artist or performer. */
52 14, /* Picture of the conductor. */
53 15, /* Picture of the band or orchestra. */
54 9, /* Picture of the composer. */
55 8, /* Picture of the lyricist or text writer. */
56 7, /* Picture of the recording location or studio. */
57 10, /* Picture of the artists during recording. */
58 11, /* Picture of the artists during performance. */
59 6, /* Picture from a movie or video related to the track. */
60 1, /* Picture of a large, coloured fish. */
61 12, /* Illustration related to the track. */
62 3, /* Logo of the band or performer. */
63 2 /* Logo of the publisher (record company). */
66 uint32_t type, len;
68 if( size < 8 )
69 return NULL;
70 #define RM(x) \
71 do { \
72 assert(size >= (x)); \
73 size -= (x); \
74 p_data += (x); \
75 } while (0)
77 type = GetDWBE( p_data );
78 RM(4);
79 len = GetDWBE( p_data );
80 RM(4);
82 if( size < len )
83 return NULL;
85 char *mime = strndup( (const char *)p_data, len );
86 if( unlikely(mime == NULL) )
87 return NULL;
88 RM(len);
90 if( size < 4 )
92 free( mime );
93 return NULL;
96 len = GetDWBE( p_data );
97 RM(4);
99 if( size < len )
101 free( mime );
102 return NULL;
105 input_attachment_t *p_attachment = NULL;
106 char *description = strndup( (const char *)p_data, len );
107 if( unlikely(description == NULL) )
108 goto error;
109 RM(len);
111 EnsureUTF8( description );
113 if( size < 20 )
114 goto error;
116 RM(4 * 4); /* skip */
118 len = GetDWBE( p_data );
119 RM(4);
121 if( size < len )
122 goto error;
124 /* printf( "Picture type=%"PRIu32" mime=%s description='%s' "
125 "file length=%zu\n", type, mime, description, len ); */
127 char name[7 + (sizeof (i_attachments) * 3) + 4 + 1];
129 snprintf( name, sizeof (name), "picture%u", i_attachments );
131 if( !strcasecmp( mime, "image/jpeg" ) )
132 strcat( name, ".jpg" );
133 else if( !strcasecmp( mime, "image/png" ) )
134 strcat( name, ".png" );
136 p_attachment = vlc_input_attachment_New( name, mime, description, p_data,
137 size /* XXX: len instead? */ );
139 if( type < ARRAY_SIZE(pi_cover_score) &&
140 *i_cover_score < pi_cover_score[type] )
142 *i_cover_idx = i_attachments;
143 *i_cover_score = pi_cover_score[type];
146 error:
147 free( mime );
148 free( description );
149 return p_attachment;
152 #undef RM
153 #define RM(x) \
154 do { \
155 i_data -= (x); \
156 p_data += (x); \
157 } while (0)
160 typedef struct chapters_array_t
162 unsigned int i_size;
163 seekpoint_t ** pp_chapters;
164 } chapters_array_t;
166 static seekpoint_t * getChapterEntry( unsigned int i_index, chapters_array_t *p_array )
168 if ( i_index > 4096 ) return NULL;
169 if ( i_index >= p_array->i_size )
171 unsigned int i_newsize = p_array->i_size;
172 while( i_index >= i_newsize ) i_newsize += 50;
174 if ( !p_array->pp_chapters )
176 p_array->pp_chapters = calloc( i_newsize, sizeof( seekpoint_t * ) );
177 if ( !p_array->pp_chapters ) return NULL;
178 p_array->i_size = i_newsize;
179 } else {
180 seekpoint_t **tmp = calloc( i_newsize, sizeof( seekpoint_t * ) );
181 if ( !tmp ) return NULL;
182 memcpy( tmp, p_array->pp_chapters, p_array->i_size * sizeof( seekpoint_t * ) );
183 free( p_array->pp_chapters );
184 p_array->pp_chapters = tmp;
185 p_array->i_size = i_newsize;
188 if ( !p_array->pp_chapters[i_index] )
189 p_array->pp_chapters[i_index] = vlc_seekpoint_New();
190 return p_array->pp_chapters[i_index];
193 #define XIPHMETA_Title (1 << 0)
194 #define XIPHMETA_Artist (1 << 1)
195 #define XIPHMETA_Genre (1 << 2)
196 #define XIPHMETA_Copyright (1 << 3)
197 #define XIPHMETA_Album (1 << 4)
198 #define XIPHMETA_TrackNum (1 << 5)
199 #define XIPHMETA_Description (1 << 6)
200 #define XIPHMETA_Rating (1 << 7)
201 #define XIPHMETA_Date (1 << 8)
202 #define XIPHMETA_Language (1 << 9)
203 #define XIPHMETA_Publisher (1 << 10)
204 #define XIPHMETA_EncodedBy (1 << 11)
205 #define XIPHMETA_TrackTotal (1 << 12)
207 static char * xiph_ExtractCueSheetMeta( const char *psz_line,
208 const char *psz_tag, int i_tag,
209 bool b_quoted )
211 if( !strncasecmp( psz_line, psz_tag, i_tag ) )
213 if( !b_quoted )
214 return strdup( &psz_line[i_tag] );
216 /* Unquote string value */
217 char *psz_value = malloc( strlen( psz_line ) - i_tag + 1 );
218 if( psz_value )
220 char *psz_out = psz_value;
221 psz_line += i_tag;
222 bool b_escaped = false;
223 while( *psz_line )
225 switch( *psz_line )
227 case '\\':
228 if( b_escaped )
230 b_escaped = false;
231 *(psz_out++) = *psz_line;
233 else
235 b_escaped = true;
237 break;
238 case '"':
239 if( b_escaped )
241 b_escaped = false;
242 *(psz_out++) = *psz_line;
244 break;
245 default:
246 *(psz_out++) = *psz_line;
247 break;
249 psz_line++;
251 *psz_out = 0;
252 return psz_value;
255 return NULL;
258 static void xiph_ParseCueSheetMeta( unsigned *pi_flags, vlc_meta_t *p_meta,
259 const char *psz_line,
260 int *pi_seekpoint, seekpoint_t ***ppp_seekpoint,
261 seekpoint_t **pp_tmppoint, bool *pb_valid )
263 VLC_UNUSED(pi_seekpoint);
264 VLC_UNUSED(ppp_seekpoint);
266 seekpoint_t *p_seekpoint = *pp_tmppoint;
267 char *psz_string;
269 #define TRY_EXTRACT_CUEMETA(var, string, quoted) \
270 if( !(*pi_flags & XIPHMETA_##var) &&\
271 ( psz_string = xiph_ExtractCueSheetMeta( psz_line, string, sizeof(string) - 1, quoted ) ) )\
273 vlc_meta_Set( p_meta, vlc_meta_##var, psz_string );\
274 free( psz_string );\
275 *pi_flags |= XIPHMETA_##var;\
278 TRY_EXTRACT_CUEMETA(Title, "TITLE \"", true)
279 else TRY_EXTRACT_CUEMETA(Genre, "REM GENRE ", false)
280 else TRY_EXTRACT_CUEMETA(Date, "REM DATE ", false)
281 else TRY_EXTRACT_CUEMETA(Artist, "PERFORMER \"", true)
282 else if( !strncasecmp( psz_line, " TRACK ", 8 ) )
284 if( p_seekpoint )
286 if( *pb_valid )
287 TAB_APPEND( *pi_seekpoint, *ppp_seekpoint, p_seekpoint );
288 else
289 vlc_seekpoint_Delete( p_seekpoint );
290 *pb_valid = false;
292 *pp_tmppoint = p_seekpoint = vlc_seekpoint_New();
294 else if( p_seekpoint && !strncasecmp( psz_line, " INDEX 01 ", 13 ) )
296 unsigned m, s, f;
297 if( sscanf( &psz_line[13], "%u:%u:%u", &m, &s, &f ) == 3 )
299 p_seekpoint->i_time_offset = vlc_tick_from_sec(m * 60 + s) + vlc_tick_from_samples(f, 75);
300 *pb_valid = true;
303 else if( p_seekpoint && !p_seekpoint->psz_name )
305 p_seekpoint->psz_name = xiph_ExtractCueSheetMeta( psz_line, " TITLE \"", 11, true );
309 static void xiph_ParseCueSheet( unsigned *pi_flags, vlc_meta_t *p_meta,
310 const char *p_data, int i_data,
311 int *pi_seekpoint, seekpoint_t ***ppp_seekpoint )
313 seekpoint_t *p_seekpoint = NULL;
314 bool b_valid = false;
316 const char *p_head = p_data;
317 const char *p_tail = p_head;
318 while( p_tail < p_data + i_data )
320 if( *p_tail == 0x0D )
322 char *psz = strndup( p_head, p_tail - p_head );
323 if( psz )
325 xiph_ParseCueSheetMeta( pi_flags, p_meta, psz,
326 pi_seekpoint, ppp_seekpoint,
327 &p_seekpoint, &b_valid );
328 free( psz );
330 if( *(++p_tail) == 0x0A )
331 p_tail++;
332 p_head = p_tail;
334 else
336 p_tail++;
341 if( p_seekpoint )
343 if( b_valid )
344 TAB_APPEND( *pi_seekpoint, *ppp_seekpoint, p_seekpoint );
345 else
346 vlc_seekpoint_Delete( p_seekpoint );
350 void vorbis_ParseComment( es_format_t *p_fmt, vlc_meta_t **pp_meta,
351 const uint8_t *p_data, size_t i_data,
352 int *i_attachments, input_attachment_t ***attachments,
353 int *i_cover_score, int *i_cover_idx,
354 int *i_seekpoint, seekpoint_t ***ppp_seekpoint,
355 float (* ppf_replay_gain)[AUDIO_REPLAY_GAIN_MAX],
356 float (* ppf_replay_peak)[AUDIO_REPLAY_GAIN_MAX] )
358 if( i_data < 8 )
359 return;
361 uint32_t vendor_length = GetDWLE(p_data); RM(4);
363 if( vendor_length > i_data )
364 return; /* invalid length */
366 RM(vendor_length); /* TODO: handle vendor payload */
368 if( i_data < 4 )
369 return;
371 uint32_t i_comment = GetDWLE(p_data); RM(4);
373 if( i_comment > i_data || i_comment == 0 )
374 return; /* invalid length */
376 /* */
377 vlc_meta_t *p_meta = *pp_meta;
378 if( !p_meta )
379 *pp_meta = p_meta = vlc_meta_New();
381 if( unlikely( !p_meta ) )
382 return;
384 /* */
385 unsigned hasMetaFlags = 0;
387 chapters_array_t chapters_array = { 0, NULL };
389 for( ; i_comment > 0 && i_data >= 4; i_comment-- )
391 uint32_t comment_size = GetDWLE(p_data); RM(4);
393 if( comment_size > i_data )
394 break;
396 if( comment_size == 0 )
397 continue;
399 char* psz_comment = malloc( comment_size + 1 );
401 if( unlikely( !psz_comment ) )
402 goto next_comment;
404 memcpy( psz_comment, p_data, comment_size );
405 psz_comment[comment_size] = '\0';
407 EnsureUTF8( psz_comment );
409 #define IF_EXTRACT(txt,var) \
410 if( !strncasecmp(psz_comment, txt, strlen(txt)) ) \
412 const char *oldval = vlc_meta_Get( p_meta, vlc_meta_ ## var ); \
413 if( oldval && (hasMetaFlags & XIPHMETA_##var)) \
415 char * newval; \
416 if( asprintf( &newval, "%s,%s", oldval, &psz_comment[strlen(txt)] ) == -1 ) \
417 newval = NULL; \
418 vlc_meta_Set( p_meta, vlc_meta_ ## var, newval ); \
419 free( newval ); \
421 else \
422 vlc_meta_Set( p_meta, vlc_meta_ ## var, &psz_comment[strlen(txt)] ); \
423 hasMetaFlags |= XIPHMETA_##var; \
426 #define IF_EXTRACT_ONCE(txt,var) \
427 if( !strncasecmp(psz_comment, txt, strlen(txt)) && !(hasMetaFlags & XIPHMETA_##var) ) \
429 vlc_meta_Set( p_meta, vlc_meta_ ## var, &psz_comment[strlen(txt)] ); \
430 hasMetaFlags |= XIPHMETA_##var; \
433 #define IF_EXTRACT_FMT(txt,var,fmt,target) \
434 if( !strncasecmp(psz_comment, txt, strlen(txt)) ) \
436 IF_EXTRACT(txt,var)\
437 if( fmt )\
439 free( fmt->target );\
440 fmt->target = strdup(&psz_comment[strlen(txt)]);\
444 IF_EXTRACT("TITLE=", Title )
445 else IF_EXTRACT("ARTIST=", Artist )
446 else IF_EXTRACT("GENRE=", Genre )
447 else IF_EXTRACT("COPYRIGHT=", Copyright )
448 else IF_EXTRACT("ALBUM=", Album )
449 else if( !(hasMetaFlags & XIPHMETA_TrackNum) && !strncasecmp(psz_comment, "TRACKNUMBER=", strlen("TRACKNUMBER=" ) ) )
451 /* Yeah yeah, such a clever idea, let's put xx/xx inside TRACKNUMBER
452 * Oh, and let's not use TRACKTOTAL or TOTALTRACKS... */
453 short unsigned u_track, u_total;
454 int nb_values = sscanf( &psz_comment[strlen("TRACKNUMBER=")], "%hu/%hu", &u_track, &u_total );
455 if( nb_values >= 1 )
457 char str[6];
458 snprintf(str, 6, "%u", u_track);
459 vlc_meta_Set( p_meta, vlc_meta_TrackNumber, str );
460 hasMetaFlags |= XIPHMETA_TrackNum;
461 if( nb_values >= 2 )
463 snprintf(str, 6, "%u", u_total);
464 vlc_meta_Set( p_meta, vlc_meta_TrackTotal, str );
465 hasMetaFlags |= XIPHMETA_TrackTotal;
469 else IF_EXTRACT_ONCE("TRACKTOTAL=", TrackTotal )
470 else IF_EXTRACT_ONCE("TOTALTRACKS=", TrackTotal )
471 else IF_EXTRACT("DESCRIPTION=", Description )
472 else IF_EXTRACT("COMMENT=", Description )
473 else IF_EXTRACT("COMMENTS=", Description )
474 else IF_EXTRACT("RATING=", Rating )
475 else IF_EXTRACT("DATE=", Date )
476 else IF_EXTRACT_FMT("LANGUAGE=", Language, p_fmt, psz_language )
477 else IF_EXTRACT("ORGANIZATION=", Publisher )
478 else IF_EXTRACT("ENCODER=", EncodedBy )
479 else if( !strncasecmp( psz_comment, "METADATA_BLOCK_PICTURE=", strlen("METADATA_BLOCK_PICTURE=")))
481 if( attachments == NULL )
482 goto next_comment;
484 uint8_t *p_picture;
485 size_t i_size = vlc_b64_decode_binary( &p_picture, &psz_comment[strlen("METADATA_BLOCK_PICTURE=")]);
486 input_attachment_t *p_attachment = ParseFlacPicture( p_picture,
487 i_size, *i_attachments, i_cover_score, i_cover_idx );
488 free( p_picture );
489 if( p_attachment )
491 TAB_APPEND_CAST( (input_attachment_t**),
492 *i_attachments, *attachments, p_attachment );
495 else if ( ppf_replay_gain && ppf_replay_peak && !strncmp(psz_comment, "REPLAYGAIN_", 11) )
497 char *p = strchr( psz_comment, '=' );
498 if (!p) goto next_comment;
499 if ( !strncasecmp(psz_comment, "REPLAYGAIN_TRACK_GAIN=", 22) )
501 (*ppf_replay_gain)[AUDIO_REPLAY_GAIN_TRACK] = us_atof( ++p );
503 else if ( !strncasecmp(psz_comment, "REPLAYGAIN_ALBUM_GAIN=", 22) )
505 (*ppf_replay_gain)[AUDIO_REPLAY_GAIN_ALBUM] = us_atof( ++p );
507 else if ( !strncasecmp(psz_comment, "REPLAYGAIN_ALBUM_PEAK=", 22) )
509 (*ppf_replay_peak)[AUDIO_REPLAY_GAIN_ALBUM] = us_atof( ++p );
511 else if ( !strncasecmp(psz_comment, "REPLAYGAIN_TRACK_PEAK=", 22) )
513 (*ppf_replay_peak)[AUDIO_REPLAY_GAIN_TRACK] = us_atof( ++p );
516 else if( !strncasecmp(psz_comment, "CHAPTER", 7) )
518 unsigned int i_chapt;
519 seekpoint_t *p_seekpoint = NULL;
521 for( int i = 0; psz_comment[i] && psz_comment[i] != '='; i++ )
522 if( psz_comment[i] >= 'a' && psz_comment[i] <= 'z' )
523 psz_comment[i] -= 'a' - 'A';
525 if( strstr( psz_comment, "NAME=" ) &&
526 sscanf( psz_comment, "CHAPTER%uNAME=", &i_chapt ) == 1 )
528 char *p = strchr( psz_comment, '=' );
529 p_seekpoint = getChapterEntry( i_chapt, &chapters_array );
530 if ( !p || ! p_seekpoint ) goto next_comment;
531 if ( ! p_seekpoint->psz_name )
532 p_seekpoint->psz_name = strdup( ++p );
534 else if( sscanf( psz_comment, "CHAPTER%u=", &i_chapt ) == 1 )
536 unsigned int h, m, s, ms;
537 char *p = strchr( psz_comment, '=' );
538 if( p && sscanf( ++p, "%u:%u:%u.%u", &h, &m, &s, &ms ) == 4 )
540 p_seekpoint = getChapterEntry( i_chapt, &chapters_array );
541 if ( ! p_seekpoint ) goto next_comment;
542 p_seekpoint->i_time_offset = vlc_tick_from_sec(h * 3600 + m * 60 + s) + VLC_TICK_FROM_MS(ms);
546 else if( !strncasecmp(psz_comment, "cuesheet=", 9) )
548 xiph_ParseCueSheet( &hasMetaFlags, p_meta, &psz_comment[9], comment_size - 9,
549 i_seekpoint, ppp_seekpoint );
551 else if( strchr( psz_comment, '=' ) )
553 /* generic (PERFORMER/LICENSE/ORGANIZATION/LOCATION/CONTACT/ISRC,
554 * undocumented tags and replay gain ) */
555 char *p = strchr( psz_comment, '=' );
556 *p++ = '\0';
558 for( int i = 0; psz_comment[i]; i++ )
559 if( psz_comment[i] >= 'a' && psz_comment[i] <= 'z' )
560 psz_comment[i] -= 'a' - 'A';
562 vlc_meta_AddExtra( p_meta, psz_comment, p );
564 #undef IF_EXTRACT
565 next_comment:
566 free( psz_comment );
567 RM( comment_size );
569 #undef RM
571 for ( unsigned int i=0; i<chapters_array.i_size; i++ )
573 if ( !chapters_array.pp_chapters[i] ) continue;
574 TAB_APPEND_CAST( (seekpoint_t**), *i_seekpoint, *ppp_seekpoint,
575 chapters_array.pp_chapters[i] );
577 free( chapters_array.pp_chapters );
580 const char *FindKateCategoryName( const char *psz_tag )
582 for( size_t i = 0; i < sizeof(Katei18nCategories)/sizeof(Katei18nCategories[0]); i++ )
584 if( !strcmp( psz_tag, Katei18nCategories[i].psz_tag ) )
585 return Katei18nCategories[i].psz_i18n;
587 return N_("Unknown category");