stream: replace input_thread_t by input_item_t
[vlc.git] / modules / demux / playlist / asx.c
blobdb5bb44972c13bae3089a59a254df7eda706fed5
1 /*****************************************************************************
2 * asx.c : ASX playlist format import
3 *****************************************************************************
4 * Copyright (C) 2005-2013 VLC authors and VideoLAN
5 * $Id$
7 * Authors: Derk-Jan Hartman <hartman at videolan dot org>
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU Lesser General Public License as published by
11 * the Free Software Foundation; either version 2.1 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public License
20 * along with this program; if not, write to the Free Software Foundation,
21 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22 *****************************************************************************/
24 /* See also:
25 * http://msdn.microsoft.com/en-us/library/windows/desktop/dd564668.aspx
28 /*****************************************************************************
29 * Preamble
30 *****************************************************************************/
31 #ifdef HAVE_CONFIG_H
32 # include "config.h"
33 #endif
35 #include <vlc_common.h>
36 #include <vlc_access.h>
37 #include <vlc_xml.h>
38 #include <vlc_strings.h>
39 #include <vlc_charset.h>
40 #include <vlc_memstream.h>
42 #include <assert.h>
43 #include <ctype.h>
44 #include <string.h>
46 #include "playlist.h"
48 /*****************************************************************************
49 * Local prototypes
50 *****************************************************************************/
51 static int ReadDir( stream_t *, input_item_node_t * );
53 static bool ParseTime(xml_reader_t *p_xml_reader, vlc_tick_t* pi_result )
55 assert( pi_result );
56 char *psz_value = NULL;
57 char *psz_start = NULL;
59 const char *psz_node = NULL;
60 const char *psz_txt = NULL;
62 int i_subfractions = -1;
64 int i_subresult = 0;
65 vlc_tick_t i_result = 0;
69 psz_txt = xml_ReaderNextAttr( p_xml_reader, &psz_node );
71 while( psz_txt && strncasecmp( psz_txt, "VALUE", 5 ) );
73 if( !psz_txt )
74 return false;
76 psz_value = strdup( psz_node );
77 psz_start = psz_value;
79 while( *psz_value )
81 if( isdigit( *psz_value ) )
83 i_subresult = i_subresult * 10;
84 i_subresult += *psz_value - '0';
85 if( i_subfractions != -1 )
86 i_subfractions++;
88 else if( *psz_value == ':' )
90 i_result += i_subresult;
91 i_result = i_result * 60;
92 i_subresult = 0;
94 else if( *psz_value == '.' )
96 i_subfractions = 0;
97 i_result += i_subresult;
98 i_subresult = 0;
100 psz_value++;
103 if( i_subfractions == -1)
104 i_result += i_subresult;
106 /* Convert to microseconds */
107 if( i_subfractions == -1)
108 i_subfractions = 0;
109 while( i_subfractions < 6 )
111 i_subresult = i_subresult * 10;
112 i_subfractions++;
114 i_result = i_result * CLOCK_FREQ;
115 if( i_subfractions != -1)
116 i_result += VLC_TICK_FROM_US( i_subresult );
118 free( psz_start );
119 *pi_result = i_result;
120 return true;
123 static bool ReadElement( xml_reader_t *p_xml_reader, char **ppsz_txt )
125 const char *psz_node = NULL;
127 /* Read the text node */
128 int ret = xml_ReaderNextNode( p_xml_reader, &psz_node );
129 if( ret <= 0 )
130 return false;
131 free( *ppsz_txt );
132 *ppsz_txt = strdup( psz_node );
133 vlc_xml_decode( *ppsz_txt );
135 /* Read the end element */
136 xml_ReaderNextNode( p_xml_reader, &psz_node );
137 /* TODO :
138 * Currently we don't check the agreement of start and end element
139 * This function is only used to read the element that cannot have child
140 * according to the reference.
142 return true;
145 static bool PeekASX( stream_t *s )
147 const uint8_t *p_peek;
148 return ( vlc_stream_Peek( s->s, &p_peek, 12 ) == 12
149 && !strncasecmp( (const char*) p_peek, "<asx version", 12 ) );
152 /*****************************************************************************
153 * Import_ASX: main import function
154 *****************************************************************************/
156 int Import_ASX( vlc_object_t *p_this )
158 stream_t *p_demux = (stream_t *)p_this;
160 CHECK_FILE(p_demux);
162 char *type = stream_MimeType( p_demux->s );
164 if( stream_HasExtension( p_demux, ".asx" )
165 || stream_HasExtension( p_demux, ".wax" )
166 || stream_HasExtension( p_demux, ".wvx" )
167 || (type != NULL && (strcasecmp(type, "video/x-ms-asf") == 0
168 || strcasecmp(type, "audio/x-ms-wax") == 0)
169 && PeekASX( p_demux ) ) )
171 msg_Dbg( p_demux, "found valid ASX playlist" );
172 free(type);
174 else
176 free(type);
177 return VLC_EGENERIC;
180 p_demux->pf_control = access_vaDirectoryControlHelper;
181 p_demux->pf_readdir = ReadDir;
182 return VLC_SUCCESS;
185 static void ProcessEntry( int *pi_n_entry, xml_reader_t *p_xml_reader,
186 input_item_node_t *p_subitems,
187 input_item_t *p_current_input, char *psz_prefix )
189 const char *psz_node = NULL;
190 const char *psz_txt = NULL;
191 int i_type;
193 char *psz_title = NULL;
194 char *psz_artist = NULL;
195 char *psz_copyright = NULL;
196 char *psz_moreinfo = NULL;
197 char *psz_description = NULL;
198 char *psz_name = NULL;
199 char *psz_mrl = NULL;
200 char *psz_href = NULL;
202 input_item_t *p_entry = NULL;
204 int i_options;
205 vlc_tick_t i_start = 0;
206 vlc_tick_t i_duration = INPUT_DURATION_ZERO;
207 char *ppsz_options[2];
211 i_type = xml_ReaderNextNode( p_xml_reader, &psz_node );
213 if( i_type == XML_READER_ERROR || i_type == XML_READER_NONE )
214 break;
216 if( i_type == XML_READER_STARTELEM )
218 /* Metadata Node */
219 if( !strncasecmp( psz_node, "TITLE", 5 ) )
221 if( !ReadElement( p_xml_reader, &psz_title ) )
222 break;
224 else if( !strncasecmp( psz_node, "AUTHOR", 6 ) )
226 if( !ReadElement( p_xml_reader, &psz_artist ) )
227 break;
229 else if( !strncasecmp( psz_node, "COPYRIGHT", 9 ) )
231 if( !ReadElement( p_xml_reader, &psz_copyright ) )
232 break;
234 else if( !strncasecmp( psz_node,"MOREINFO", 8 ) )
238 psz_txt = xml_ReaderNextAttr( p_xml_reader, &psz_node );
240 while(psz_txt && strncasecmp( psz_txt, "HREF", 4 ) );
242 if( !psz_txt )
244 if( !ReadElement( p_xml_reader, &psz_moreinfo ) )
245 break;
247 else
248 psz_moreinfo = strdup( psz_node );
249 vlc_xml_decode( psz_moreinfo );
251 else if( !strncasecmp( psz_node, "ABSTRACT", 8 ) )
253 if( !ReadElement( p_xml_reader, &psz_description ) )
254 break;
256 else if( !strncasecmp( psz_node, "DURATION", 8 ) )
258 if( !ParseTime( p_xml_reader, &i_duration ) )
259 break;
261 else if( !strncasecmp( psz_node, "STARTTIME", 9 ) )
263 if( !ParseTime( p_xml_reader, &i_start ) )
264 break;
266 /* Reference Node */
267 /* All ref node will be converted into an entry */
268 else if( !strncasecmp( psz_node, "REF", 3 ) )
270 *pi_n_entry = *pi_n_entry + 1;
272 if( !psz_title )
273 psz_title = input_item_GetTitle( p_current_input );
274 if( !psz_artist )
275 psz_artist = input_item_GetArtist( p_current_input );
276 if( !psz_copyright )
277 psz_copyright = input_item_GetCopyright( p_current_input );
278 if( !psz_description )
279 psz_description = input_item_GetDescription( p_current_input );
283 psz_txt = xml_ReaderNextAttr( p_xml_reader, &psz_node );
285 while( psz_txt != NULL && strncasecmp( psz_txt, "HREF", 4) );
286 if( psz_txt == NULL )
287 break;
288 psz_href = strdup( psz_node );
290 if( asprintf( &psz_name, "%d. %s", *pi_n_entry, psz_title ) == -1)
291 psz_name = strdup( psz_title );
292 vlc_xml_decode( psz_href );
293 psz_mrl = ProcessMRL( psz_href, psz_prefix );
295 /* Add Time information */
296 i_options = 0;
297 if( i_start )
299 if( asprintf( ppsz_options, ":start-time=%"PRId64 ,
300 i_start / CLOCK_FREQ ) != -1)
301 i_options++;
303 if( i_duration)
305 if( asprintf( ppsz_options + i_options,
306 ":stop-time=%"PRId64,
307 (i_start + i_duration) / CLOCK_FREQ ) != -1)
308 i_options++;
311 /* Create the input item */
312 p_entry = input_item_NewExt( psz_mrl, psz_name, i_duration,
313 ITEM_TYPE_UNKNOWN, ITEM_NET_UNKNOWN );
314 if( p_entry == NULL )
315 goto end;
317 input_item_AddOptions( p_entry, i_options,
318 (const char **)ppsz_options,
319 VLC_INPUT_OPTION_TRUSTED );
320 input_item_CopyOptions( p_entry, p_current_input );
322 /* Add the metadata */
323 if( psz_name )
324 input_item_SetTitle( p_entry, psz_name );
325 if( psz_artist )
326 input_item_SetArtist( p_entry, psz_artist );
327 if( psz_copyright )
328 input_item_SetCopyright( p_entry, psz_copyright );
329 if( psz_moreinfo )
330 input_item_SetURL( p_entry, psz_moreinfo );
331 if( psz_description )
332 input_item_SetDescription( p_entry, psz_description );
333 if( i_duration > 0 )
334 p_entry->i_duration = i_duration;
336 input_item_node_AppendItem( p_subitems, p_entry );
338 input_item_Release( p_entry );
340 end:
341 while( i_options )
342 free( ppsz_options[--i_options] );
343 free( psz_name );
344 free( psz_mrl );
348 while( i_type != XML_READER_ENDELEM || strncasecmp( psz_node, "ENTRY", 5 ) );
350 free( psz_href );
351 free( psz_title );
352 free( psz_artist );
353 free( psz_copyright );
354 free( psz_moreinfo );
355 free( psz_description );
358 /// this looks for patterns like &name; &#DEC; or &#xHEX;
359 static bool isXmlEncoded(const char* psz_str)
361 assert( psz_str != NULL );
362 //look for special characters
363 if( strpbrk(psz_str, "<>'\"") != NULL )
364 return false;
366 bool is_escaped = false;
367 while( true )
369 const char* psz_amp = strchr(psz_str, '&');
370 if( psz_amp == NULL )
371 break;
372 const char* psz_end = strchr(psz_amp, ';');
373 if( psz_end == NULL )
374 return false;
376 else if(psz_amp[1] == '#')
378 if( psz_amp[2] == 'x' )
380 const char* psz_ptr = &psz_amp[3];
381 if( psz_ptr == psz_end )
382 return false;
383 for ( ; psz_ptr < psz_end; psz_ptr++)
384 if( ! isxdigit( *psz_ptr ) )
385 return false;
387 else
389 const char* psz_ptr = &(psz_amp[2]);
390 if( psz_ptr == psz_end )
391 return false;
392 for ( ; psz_ptr < psz_end; psz_ptr++)
393 if( ! isdigit( *psz_ptr ) )
394 return false;
397 else
399 const char* psz_ptr = &(psz_amp[1]);
400 if( psz_ptr == psz_end )
401 return false;
402 for ( ; psz_ptr < psz_end; psz_ptr++)
403 if( ! isalnum( *psz_ptr ) )
404 return false;
406 is_escaped = true;
407 psz_str = psz_end;
409 return is_escaped;
412 static void memstream_puts_xmlencoded(struct vlc_memstream* p_stream, const char* psz_begin, const char* psz_end)
414 char *psz_tmp = NULL;
415 if(psz_end == NULL)
416 psz_tmp = strdup( psz_begin );
417 else
418 psz_tmp = strndup( psz_begin, psz_end - psz_begin );
420 if ( psz_tmp == NULL )
421 return;
423 if( isXmlEncoded( psz_tmp ) )
424 vlc_memstream_puts( p_stream, psz_tmp );
425 else
427 char *psz_tmp_encoded = vlc_xml_encode( psz_tmp );
428 if ( !psz_tmp_encoded )
430 free( psz_tmp );
431 return;
433 vlc_memstream_puts( p_stream, psz_tmp_encoded );
434 free( psz_tmp_encoded );
436 free(psz_tmp);
440 * ASX doesn't requires to be a strict XML document, this function will
441 * - make tags and attributes upercase
442 * - escape strings when required
444 static char* ASXToXML( char* psz_source )
446 bool b_in_string= false;
447 char *psz_source_cur = psz_source;
448 char *psz_source_old = psz_source;
449 char c_string_delim;
451 struct vlc_memstream stream_out;
452 if( vlc_memstream_open( &stream_out ) != 0 )
453 return NULL;
455 while ( psz_source_cur != NULL && *psz_source_cur != '\0' )
457 psz_source_old = psz_source_cur;
458 //search tag start
459 if( ( psz_source_cur = strchr( psz_source_cur, '<' ) ) == NULL )
461 memstream_puts_xmlencoded(&stream_out, psz_source_old, NULL);
462 //vlc_memstream_puts( &stream_out, psz_source_old );
463 break;
466 memstream_puts_xmlencoded(&stream_out, psz_source_old, psz_source_cur);
467 psz_source_old = psz_source_cur;
469 //skip if comment, no need to copy them to the ouput.
470 if( strncmp( psz_source_cur, "<!--", 4 ) == 0 )
472 psz_source_cur += 4;
473 psz_source_cur = strstr( psz_source_cur, "-->" );
474 if( psz_source_cur == NULL)
475 break;
476 else
478 psz_source_cur += 3;
479 continue;
482 else
484 vlc_memstream_putc( &stream_out, '<' );
485 psz_source_cur++;
488 for ( ; *psz_source_cur != '\0'; psz_source_cur++ )
490 if( b_in_string == false )
492 if( *psz_source_cur == '>')
494 vlc_memstream_putc( &stream_out, '>' );
495 psz_source_cur++;
496 break;
498 if( *psz_source_cur == '"' || *psz_source_cur == '\'' )
500 c_string_delim = *psz_source_cur;
501 b_in_string = true;
502 vlc_memstream_putc( &stream_out, c_string_delim );
504 else
506 //convert tag and attributes to upper case
507 vlc_memstream_putc( &stream_out, vlc_ascii_toupper( *psz_source_cur ) );
510 else
512 psz_source_old = psz_source_cur;
513 psz_source_cur = strchr( psz_source_cur, c_string_delim );
514 if( psz_source_cur == NULL )
515 break;
517 memstream_puts_xmlencoded(&stream_out, psz_source_old, psz_source_cur);
518 vlc_memstream_putc( &stream_out, c_string_delim );
519 b_in_string = false;
523 if( vlc_memstream_close( &stream_out ) != 0 )
524 return NULL;
526 return stream_out.ptr;
529 static char *detectXmlEncoding( const char *psz_xml )
531 const char *psz_keyword_begin = NULL;
532 const char *psz_keyword_end = NULL;
534 const char *psz_value_begin = NULL;
535 const char *psz_value_end = NULL;
537 psz_xml += strspn( psz_xml, " \n\r\t" );
538 if( strncasecmp( psz_xml, "<?xml", 5 ) != 0 )
539 return NULL;
540 psz_xml += 5;
542 const char *psz_end = strstr( psz_xml, "?>" );
543 if( psz_end == NULL )
544 return NULL;
546 while( psz_xml < psz_end )
548 psz_keyword_begin = psz_xml = psz_xml + strspn( psz_xml, " \n\r\t" );
549 if( *psz_xml == '\0' )
550 return NULL;
551 psz_keyword_end = psz_xml = psz_xml + strcspn( psz_xml, " \n\r\t=" );
552 if( *psz_xml == '\0' )
553 return NULL;
555 psz_xml += strspn( psz_xml, " \n\r\t" );
556 if( *psz_xml != '=' )
557 return NULL;
558 psz_xml++;
560 psz_xml += strspn( psz_xml, " \n\r\t" );
561 char quote = *psz_xml;
562 if( quote != '"' && quote != '\'' )
563 return NULL;
565 psz_value_begin = ++psz_xml;
566 psz_value_end = psz_xml = strchr( psz_xml, quote );
567 if( psz_xml == NULL )
568 return NULL;
569 psz_xml++;
571 if( strncasecmp( psz_keyword_begin, "encoding", psz_keyword_end - psz_keyword_begin ) == 0
572 && ( psz_value_end -psz_value_begin) > 0 )
574 return strndup(psz_value_begin, psz_value_end -psz_value_begin);
578 return NULL;
582 static stream_t* PreparseStream( stream_t *p_demux )
584 stream_t *s = p_demux->s;
585 uint64_t streamSize;
586 static const size_t maxsize = 1024 * 1024;
588 if( vlc_stream_GetSize( s, &streamSize ) != VLC_SUCCESS)
589 streamSize = maxsize;
591 // Don't attempt to convert/store huge streams
592 if( streamSize > maxsize )
593 return NULL;
594 char* psz_source = malloc( streamSize + 1 * sizeof( *psz_source ) );
595 if ( unlikely( psz_source == NULL ) )
596 return NULL;
597 size_t i_read = 0;
600 ssize_t i_ret = vlc_stream_Read( s, psz_source + i_read,
601 streamSize > 1024 ? 1024 : streamSize );
602 if ( i_ret <= 0 )
603 break;
604 assert( (size_t)i_ret <= streamSize );
605 streamSize -= i_ret;
606 i_read += i_ret;
607 } while ( streamSize > 0 );
608 psz_source[i_read] = 0;
611 char *encoding = detectXmlEncoding( psz_source );
612 if( encoding != NULL )
614 if( strcasecmp( encoding, "UTF-8" ) == 0 )
615 free( encoding );
616 else
618 //strip xml prologue to avoid double conversion
619 char *tmp = strstr( psz_source, "?>" ) + 2;
620 tmp = FromCharset( encoding, tmp, strlen( tmp ) );
621 free( psz_source );
622 free( encoding );
623 if ( !tmp )
624 return NULL;
625 psz_source = tmp;
628 else if( !IsUTF8( psz_source ) )
630 char *tmp = FromLocaleDup( psz_source );
631 free( psz_source );
632 if( !tmp )
633 return NULL;
634 psz_source = tmp;
637 char *psz_source_xml = ASXToXML( psz_source );
638 free( psz_source );
639 if( psz_source_xml == NULL )
640 return NULL;
642 stream_t * p_stream = vlc_stream_MemoryNew( p_demux, (uint8_t*)psz_source_xml, strlen(psz_source_xml), false );
643 return p_stream;
646 static int ReadDir( stream_t *p_demux, input_item_node_t *p_subitems )
648 if (unlikely(p_demux->psz_url == NULL))
649 return VLC_EGENERIC;
651 const char *psz_node = NULL;
652 char *psz_txt = NULL;
653 char *psz_base = strdup( p_demux->psz_url );
654 if (unlikely(psz_base == NULL))
655 return VLC_ENOMEM;
657 char *psz_title_asx = NULL;
658 char *psz_entryref = NULL;
660 xml_reader_t *p_xml_reader = NULL;
661 input_item_t *p_current_input = GetCurrentItem( p_demux );
662 stream_t* p_stream = PreparseStream( p_demux );
664 bool b_first_node = false;
665 int i_type;
666 int i_n_entry = 0;
668 p_xml_reader = xml_ReaderCreate( p_demux, p_stream ? p_stream
669 : p_demux->s );
670 if( !p_xml_reader )
672 msg_Err( p_demux, "Cannot parse ASX input file as XML");
673 goto error;
678 i_type = xml_ReaderNextNode( p_xml_reader, &psz_node );
679 if( i_type == XML_READER_ERROR )
680 break;
682 if( i_type == XML_READER_STARTELEM )
684 if( !b_first_node )
686 if(!strncasecmp( psz_node, "ASX", 3 ) )
687 b_first_node = true;
688 else
690 msg_Err( p_demux, "invalid root node" );
691 goto error;
695 /* Metadata Node Handler */
696 if( !strncasecmp( psz_node, "TITLE", 5 ) )
698 if( ! ReadElement( p_xml_reader, &psz_title_asx ) )
699 break;
700 input_item_SetTitle( p_current_input, psz_title_asx );
702 else if( !strncasecmp( psz_node, "AUTHOR", 6 ) )
704 if( ! ReadElement( p_xml_reader, &psz_txt ) )
705 break;
706 input_item_SetArtist( p_current_input, psz_txt );
708 else if( !strncasecmp( psz_node, "COPYRIGHT", 9 ) )
710 if( ! ReadElement( p_xml_reader, &psz_txt ) )
711 break;
712 input_item_SetCopyright( p_current_input, psz_txt );
714 else if( !strncasecmp( psz_node, "MOREINFO", 8 ) )
716 const char *psz_tmp;
719 psz_tmp = xml_ReaderNextAttr( p_xml_reader, &psz_node );
721 while( psz_tmp && strncasecmp( psz_tmp, "HREF", 4 ) );
723 if( !psz_tmp ) // If HREF attribute doesn't exist
725 if( ! ReadElement( p_xml_reader, &psz_txt ) )
726 break;
728 else
729 psz_txt = strdup( psz_node );
731 vlc_xml_decode( psz_txt );
732 input_item_SetURL( p_current_input, psz_txt );
734 else if( !strncasecmp( psz_node, "ABSTRACT", 8 ) )
736 if( ! ReadElement( p_xml_reader, &psz_txt ) )
737 break;
738 input_item_SetDescription( p_current_input, psz_txt );
740 else
741 /* Base Node handler */
742 if( !strncasecmp( psz_node, "BASE", 4 ) )
744 if( ! ReadElement( p_xml_reader, &psz_base ) )
745 break;
747 else
748 /* Entry Ref Handler */
749 if( !strncasecmp( psz_node, "ENTRYREF", 7 ) )
751 const char *psz_tmp;
754 psz_tmp = xml_ReaderNextAttr( p_xml_reader, &psz_node );
756 while( psz_tmp && !strncasecmp( psz_tmp, "HREF", 4 ) );
757 if( ! psz_tmp )
758 break;
760 /* Create new input item */
761 input_item_t *p_input;
762 psz_txt = strdup( psz_node );
763 vlc_xml_decode( psz_txt );
764 p_input = input_item_New( psz_txt, psz_title_asx );
765 input_item_CopyOptions( p_input, p_current_input );
766 input_item_node_AppendItem( p_subitems, p_input );
768 input_item_Release( p_input );
770 else
771 /* Entry Handler */
772 if( !strncasecmp( psz_node, "ENTRY", 5 ) )
774 ProcessEntry( &i_n_entry, p_xml_reader, p_subitems,
775 p_current_input, psz_base);
777 /* FIXME Unsupported elements
778 PARAM
779 EVENT
780 REPEAT
781 ENDMARK
782 STARTMARK
786 while( i_type != XML_READER_ENDELEM || strncasecmp( psz_node, "ASX", 3 ) );
788 error:
789 free( psz_base );
790 free( psz_title_asx );
791 free( psz_entryref );
792 free( psz_txt );
794 if( p_xml_reader)
795 xml_ReaderDelete( p_xml_reader );
796 if( p_stream )
797 vlc_stream_Delete( p_stream );
799 return 0;