1 /*****************************************************************************
2 * asx.c : ASX playlist format import
3 *****************************************************************************
4 * Copyright (C) 2005-2013 VLC authors and VideoLAN
7 * Authors: Derk-Jan Hartman <hartman at videolan dot org>
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU Lesser General Public License as published by
11 * the Free Software Foundation; either version 2.1 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public License
20 * along with this program; if not, write to the Free Software Foundation,
21 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22 *****************************************************************************/
25 * http://msdn.microsoft.com/en-us/library/windows/desktop/dd564668.aspx
28 /*****************************************************************************
30 *****************************************************************************/
35 #include <vlc_common.h>
36 #include <vlc_access.h>
38 #include <vlc_strings.h>
39 #include <vlc_charset.h>
40 #include <vlc_memstream.h>
48 /*****************************************************************************
50 *****************************************************************************/
51 static int ReadDir( stream_t
*, input_item_node_t
* );
53 static bool ParseTime(xml_reader_t
*p_xml_reader
, vlc_tick_t
* pi_result
)
56 char *psz_value
= NULL
;
57 char *psz_start
= NULL
;
59 const char *psz_node
= NULL
;
60 const char *psz_txt
= NULL
;
62 int i_subfractions
= -1;
65 vlc_tick_t i_result
= 0;
69 psz_txt
= xml_ReaderNextAttr( p_xml_reader
, &psz_node
);
71 while( psz_txt
&& strncasecmp( psz_txt
, "VALUE", 5 ) );
76 psz_value
= strdup( psz_node
);
77 psz_start
= psz_value
;
81 if( isdigit( *psz_value
) )
83 i_subresult
= i_subresult
* 10;
84 i_subresult
+= *psz_value
- '0';
85 if( i_subfractions
!= -1 )
88 else if( *psz_value
== ':' )
90 i_result
+= i_subresult
;
91 i_result
= i_result
* 60;
94 else if( *psz_value
== '.' )
97 i_result
+= i_subresult
;
103 if( i_subfractions
== -1)
104 i_result
+= i_subresult
;
106 /* Convert to microseconds */
107 if( i_subfractions
== -1)
109 while( i_subfractions
< 6 )
111 i_subresult
= i_subresult
* 10;
114 i_result
= i_result
* CLOCK_FREQ
;
115 if( i_subfractions
!= -1)
116 i_result
+= VLC_TICK_FROM_US( i_subresult
);
119 *pi_result
= i_result
;
123 static bool ReadElement( xml_reader_t
*p_xml_reader
, char **ppsz_txt
)
125 const char *psz_node
= NULL
;
127 /* Read the text node */
128 int ret
= xml_ReaderNextNode( p_xml_reader
, &psz_node
);
132 *ppsz_txt
= strdup( psz_node
);
133 vlc_xml_decode( *ppsz_txt
);
135 /* Read the end element */
136 xml_ReaderNextNode( p_xml_reader
, &psz_node
);
138 * Currently we don't check the agreement of start and end element
139 * This function is only used to read the element that cannot have child
140 * according to the reference.
145 static bool PeekASX( stream_t
*s
)
147 const uint8_t *p_peek
;
148 return ( vlc_stream_Peek( s
->s
, &p_peek
, 12 ) == 12
149 && !strncasecmp( (const char*) p_peek
, "<asx version", 12 ) );
152 /*****************************************************************************
153 * Import_ASX: main import function
154 *****************************************************************************/
156 int Import_ASX( vlc_object_t
*p_this
)
158 stream_t
*p_demux
= (stream_t
*)p_this
;
162 char *type
= stream_MimeType( p_demux
->s
);
164 if( stream_HasExtension( p_demux
, ".asx" )
165 || stream_HasExtension( p_demux
, ".wax" )
166 || stream_HasExtension( p_demux
, ".wvx" )
167 || (type
!= NULL
&& (strcasecmp(type
, "video/x-ms-asf") == 0
168 || strcasecmp(type
, "audio/x-ms-wax") == 0)
169 && PeekASX( p_demux
) ) )
171 msg_Dbg( p_demux
, "found valid ASX playlist" );
180 p_demux
->pf_control
= access_vaDirectoryControlHelper
;
181 p_demux
->pf_readdir
= ReadDir
;
185 static void ProcessEntry( int *pi_n_entry
, xml_reader_t
*p_xml_reader
,
186 input_item_node_t
*p_subitems
,
187 input_item_t
*p_current_input
, char *psz_prefix
)
189 const char *psz_node
= NULL
;
190 const char *psz_txt
= NULL
;
193 char *psz_title
= NULL
;
194 char *psz_artist
= NULL
;
195 char *psz_copyright
= NULL
;
196 char *psz_moreinfo
= NULL
;
197 char *psz_description
= NULL
;
198 char *psz_name
= NULL
;
199 char *psz_mrl
= NULL
;
200 char *psz_href
= NULL
;
202 input_item_t
*p_entry
= NULL
;
205 vlc_tick_t i_start
= 0;
206 vlc_tick_t i_duration
= INPUT_DURATION_ZERO
;
207 char *ppsz_options
[2];
211 i_type
= xml_ReaderNextNode( p_xml_reader
, &psz_node
);
213 if( i_type
== XML_READER_ERROR
|| i_type
== XML_READER_NONE
)
216 if( i_type
== XML_READER_STARTELEM
)
219 if( !strncasecmp( psz_node
, "TITLE", 5 ) )
221 if( !ReadElement( p_xml_reader
, &psz_title
) )
224 else if( !strncasecmp( psz_node
, "AUTHOR", 6 ) )
226 if( !ReadElement( p_xml_reader
, &psz_artist
) )
229 else if( !strncasecmp( psz_node
, "COPYRIGHT", 9 ) )
231 if( !ReadElement( p_xml_reader
, &psz_copyright
) )
234 else if( !strncasecmp( psz_node
,"MOREINFO", 8 ) )
238 psz_txt
= xml_ReaderNextAttr( p_xml_reader
, &psz_node
);
240 while(psz_txt
&& strncasecmp( psz_txt
, "HREF", 4 ) );
244 if( !ReadElement( p_xml_reader
, &psz_moreinfo
) )
248 psz_moreinfo
= strdup( psz_node
);
249 vlc_xml_decode( psz_moreinfo
);
251 else if( !strncasecmp( psz_node
, "ABSTRACT", 8 ) )
253 if( !ReadElement( p_xml_reader
, &psz_description
) )
256 else if( !strncasecmp( psz_node
, "DURATION", 8 ) )
258 if( !ParseTime( p_xml_reader
, &i_duration
) )
261 else if( !strncasecmp( psz_node
, "STARTTIME", 9 ) )
263 if( !ParseTime( p_xml_reader
, &i_start
) )
267 /* All ref node will be converted into an entry */
268 else if( !strncasecmp( psz_node
, "REF", 3 ) )
270 *pi_n_entry
= *pi_n_entry
+ 1;
273 psz_title
= input_item_GetTitle( p_current_input
);
275 psz_artist
= input_item_GetArtist( p_current_input
);
277 psz_copyright
= input_item_GetCopyright( p_current_input
);
278 if( !psz_description
)
279 psz_description
= input_item_GetDescription( p_current_input
);
283 psz_txt
= xml_ReaderNextAttr( p_xml_reader
, &psz_node
);
285 while( psz_txt
!= NULL
&& strncasecmp( psz_txt
, "HREF", 4) );
286 if( psz_txt
== NULL
)
288 psz_href
= strdup( psz_node
);
290 if( asprintf( &psz_name
, "%d. %s", *pi_n_entry
, psz_title
) == -1)
291 psz_name
= strdup( psz_title
);
292 vlc_xml_decode( psz_href
);
293 psz_mrl
= ProcessMRL( psz_href
, psz_prefix
);
295 /* Add Time information */
299 if( asprintf( ppsz_options
, ":start-time=%"PRId64
,
300 i_start
/ CLOCK_FREQ
) != -1)
305 if( asprintf( ppsz_options
+ i_options
,
306 ":stop-time=%"PRId64
,
307 (i_start
+ i_duration
) / CLOCK_FREQ
) != -1)
311 /* Create the input item */
312 p_entry
= input_item_NewExt( psz_mrl
, psz_name
, i_duration
,
313 ITEM_TYPE_UNKNOWN
, ITEM_NET_UNKNOWN
);
314 if( p_entry
== NULL
)
317 input_item_AddOptions( p_entry
, i_options
,
318 (const char **)ppsz_options
,
319 VLC_INPUT_OPTION_TRUSTED
);
320 input_item_CopyOptions( p_entry
, p_current_input
);
322 /* Add the metadata */
324 input_item_SetTitle( p_entry
, psz_name
);
326 input_item_SetArtist( p_entry
, psz_artist
);
328 input_item_SetCopyright( p_entry
, psz_copyright
);
330 input_item_SetURL( p_entry
, psz_moreinfo
);
331 if( psz_description
)
332 input_item_SetDescription( p_entry
, psz_description
);
334 p_entry
->i_duration
= i_duration
;
336 input_item_node_AppendItem( p_subitems
, p_entry
);
338 input_item_Release( p_entry
);
342 free( ppsz_options
[--i_options
] );
348 while( i_type
!= XML_READER_ENDELEM
|| strncasecmp( psz_node
, "ENTRY", 5 ) );
353 free( psz_copyright
);
354 free( psz_moreinfo
);
355 free( psz_description
);
358 /// this looks for patterns like &name; &#DEC; or &#xHEX;
359 static bool isXmlEncoded(const char* psz_str
)
361 assert( psz_str
!= NULL
);
362 //look for special characters
363 if( strpbrk(psz_str
, "<>'\"") != NULL
)
366 bool is_escaped
= false;
369 const char* psz_amp
= strchr(psz_str
, '&');
370 if( psz_amp
== NULL
)
372 const char* psz_end
= strchr(psz_amp
, ';');
373 if( psz_end
== NULL
)
376 else if(psz_amp
[1] == '#')
378 if( psz_amp
[2] == 'x' )
380 const char* psz_ptr
= &psz_amp
[3];
381 if( psz_ptr
== psz_end
)
383 for ( ; psz_ptr
< psz_end
; psz_ptr
++)
384 if( ! isxdigit( *psz_ptr
) )
389 const char* psz_ptr
= &(psz_amp
[2]);
390 if( psz_ptr
== psz_end
)
392 for ( ; psz_ptr
< psz_end
; psz_ptr
++)
393 if( ! isdigit( *psz_ptr
) )
399 const char* psz_ptr
= &(psz_amp
[1]);
400 if( psz_ptr
== psz_end
)
402 for ( ; psz_ptr
< psz_end
; psz_ptr
++)
403 if( ! isalnum( *psz_ptr
) )
412 static void memstream_puts_xmlencoded(struct vlc_memstream
* p_stream
, const char* psz_begin
, const char* psz_end
)
414 char *psz_tmp
= NULL
;
416 psz_tmp
= strdup( psz_begin
);
418 psz_tmp
= strndup( psz_begin
, psz_end
- psz_begin
);
420 if ( psz_tmp
== NULL
)
423 if( isXmlEncoded( psz_tmp
) )
424 vlc_memstream_puts( p_stream
, psz_tmp
);
427 char *psz_tmp_encoded
= vlc_xml_encode( psz_tmp
);
428 if ( !psz_tmp_encoded
)
433 vlc_memstream_puts( p_stream
, psz_tmp_encoded
);
434 free( psz_tmp_encoded
);
440 * ASX doesn't requires to be a strict XML document, this function will
441 * - make tags and attributes upercase
442 * - escape strings when required
444 static char* ASXToXML( char* psz_source
)
446 bool b_in_string
= false;
447 char *psz_source_cur
= psz_source
;
448 char *psz_source_old
= psz_source
;
451 struct vlc_memstream stream_out
;
452 if( vlc_memstream_open( &stream_out
) != 0 )
455 while ( psz_source_cur
!= NULL
&& *psz_source_cur
!= '\0' )
457 psz_source_old
= psz_source_cur
;
459 if( ( psz_source_cur
= strchr( psz_source_cur
, '<' ) ) == NULL
)
461 memstream_puts_xmlencoded(&stream_out
, psz_source_old
, NULL
);
462 //vlc_memstream_puts( &stream_out, psz_source_old );
466 memstream_puts_xmlencoded(&stream_out
, psz_source_old
, psz_source_cur
);
467 psz_source_old
= psz_source_cur
;
469 //skip if comment, no need to copy them to the ouput.
470 if( strncmp( psz_source_cur
, "<!--", 4 ) == 0 )
473 psz_source_cur
= strstr( psz_source_cur
, "-->" );
474 if( psz_source_cur
== NULL
)
484 vlc_memstream_putc( &stream_out
, '<' );
488 for ( ; *psz_source_cur
!= '\0'; psz_source_cur
++ )
490 if( b_in_string
== false )
492 if( *psz_source_cur
== '>')
494 vlc_memstream_putc( &stream_out
, '>' );
498 if( *psz_source_cur
== '"' || *psz_source_cur
== '\'' )
500 c_string_delim
= *psz_source_cur
;
502 vlc_memstream_putc( &stream_out
, c_string_delim
);
506 //convert tag and attributes to upper case
507 vlc_memstream_putc( &stream_out
, vlc_ascii_toupper( *psz_source_cur
) );
512 psz_source_old
= psz_source_cur
;
513 psz_source_cur
= strchr( psz_source_cur
, c_string_delim
);
514 if( psz_source_cur
== NULL
)
517 memstream_puts_xmlencoded(&stream_out
, psz_source_old
, psz_source_cur
);
518 vlc_memstream_putc( &stream_out
, c_string_delim
);
523 if( vlc_memstream_close( &stream_out
) != 0 )
526 return stream_out
.ptr
;
529 static char *detectXmlEncoding( const char *psz_xml
)
531 const char *psz_keyword_begin
= NULL
;
532 const char *psz_keyword_end
= NULL
;
534 const char *psz_value_begin
= NULL
;
535 const char *psz_value_end
= NULL
;
537 psz_xml
+= strspn( psz_xml
, " \n\r\t" );
538 if( strncasecmp( psz_xml
, "<?xml", 5 ) != 0 )
542 const char *psz_end
= strstr( psz_xml
, "?>" );
543 if( psz_end
== NULL
)
546 while( psz_xml
< psz_end
)
548 psz_keyword_begin
= psz_xml
= psz_xml
+ strspn( psz_xml
, " \n\r\t" );
549 if( *psz_xml
== '\0' )
551 psz_keyword_end
= psz_xml
= psz_xml
+ strcspn( psz_xml
, " \n\r\t=" );
552 if( *psz_xml
== '\0' )
555 psz_xml
+= strspn( psz_xml
, " \n\r\t" );
556 if( *psz_xml
!= '=' )
560 psz_xml
+= strspn( psz_xml
, " \n\r\t" );
561 char quote
= *psz_xml
;
562 if( quote
!= '"' && quote
!= '\'' )
565 psz_value_begin
= ++psz_xml
;
566 psz_value_end
= psz_xml
= strchr( psz_xml
, quote
);
567 if( psz_xml
== NULL
)
571 if( strncasecmp( psz_keyword_begin
, "encoding", psz_keyword_end
- psz_keyword_begin
) == 0
572 && ( psz_value_end
-psz_value_begin
) > 0 )
574 return strndup(psz_value_begin
, psz_value_end
-psz_value_begin
);
582 static stream_t
* PreparseStream( stream_t
*p_demux
)
584 stream_t
*s
= p_demux
->s
;
586 static const size_t maxsize
= 1024 * 1024;
588 if( vlc_stream_GetSize( s
, &streamSize
) != VLC_SUCCESS
)
589 streamSize
= maxsize
;
591 // Don't attempt to convert/store huge streams
592 if( streamSize
> maxsize
)
594 char* psz_source
= malloc( streamSize
+ 1 * sizeof( *psz_source
) );
595 if ( unlikely( psz_source
== NULL
) )
600 ssize_t i_ret
= vlc_stream_Read( s
, psz_source
+ i_read
,
601 streamSize
> 1024 ? 1024 : streamSize
);
604 assert( (size_t)i_ret
<= streamSize
);
607 } while ( streamSize
> 0 );
608 psz_source
[i_read
] = 0;
611 char *encoding
= detectXmlEncoding( psz_source
);
612 if( encoding
!= NULL
)
614 if( strcasecmp( encoding
, "UTF-8" ) == 0 )
618 //strip xml prologue to avoid double conversion
619 char *tmp
= strstr( psz_source
, "?>" ) + 2;
620 tmp
= FromCharset( encoding
, tmp
, strlen( tmp
) );
628 else if( !IsUTF8( psz_source
) )
630 char *tmp
= FromLocaleDup( psz_source
);
637 char *psz_source_xml
= ASXToXML( psz_source
);
639 if( psz_source_xml
== NULL
)
642 stream_t
* p_stream
= vlc_stream_MemoryNew( p_demux
, (uint8_t*)psz_source_xml
, strlen(psz_source_xml
), false );
646 static int ReadDir( stream_t
*p_demux
, input_item_node_t
*p_subitems
)
648 if (unlikely(p_demux
->psz_url
== NULL
))
651 const char *psz_node
= NULL
;
652 char *psz_txt
= NULL
;
653 char *psz_base
= strdup( p_demux
->psz_url
);
654 if (unlikely(psz_base
== NULL
))
657 char *psz_title_asx
= NULL
;
658 char *psz_entryref
= NULL
;
660 xml_reader_t
*p_xml_reader
= NULL
;
661 input_item_t
*p_current_input
= GetCurrentItem( p_demux
);
662 stream_t
* p_stream
= PreparseStream( p_demux
);
664 bool b_first_node
= false;
668 p_xml_reader
= xml_ReaderCreate( p_demux
, p_stream
? p_stream
672 msg_Err( p_demux
, "Cannot parse ASX input file as XML");
678 i_type
= xml_ReaderNextNode( p_xml_reader
, &psz_node
);
679 if( i_type
== XML_READER_ERROR
)
682 if( i_type
== XML_READER_STARTELEM
)
686 if(!strncasecmp( psz_node
, "ASX", 3 ) )
690 msg_Err( p_demux
, "invalid root node" );
695 /* Metadata Node Handler */
696 if( !strncasecmp( psz_node
, "TITLE", 5 ) )
698 if( ! ReadElement( p_xml_reader
, &psz_title_asx
) )
700 input_item_SetTitle( p_current_input
, psz_title_asx
);
702 else if( !strncasecmp( psz_node
, "AUTHOR", 6 ) )
704 if( ! ReadElement( p_xml_reader
, &psz_txt
) )
706 input_item_SetArtist( p_current_input
, psz_txt
);
708 else if( !strncasecmp( psz_node
, "COPYRIGHT", 9 ) )
710 if( ! ReadElement( p_xml_reader
, &psz_txt
) )
712 input_item_SetCopyright( p_current_input
, psz_txt
);
714 else if( !strncasecmp( psz_node
, "MOREINFO", 8 ) )
719 psz_tmp
= xml_ReaderNextAttr( p_xml_reader
, &psz_node
);
721 while( psz_tmp
&& strncasecmp( psz_tmp
, "HREF", 4 ) );
723 if( !psz_tmp
) // If HREF attribute doesn't exist
725 if( ! ReadElement( p_xml_reader
, &psz_txt
) )
729 psz_txt
= strdup( psz_node
);
731 vlc_xml_decode( psz_txt
);
732 input_item_SetURL( p_current_input
, psz_txt
);
734 else if( !strncasecmp( psz_node
, "ABSTRACT", 8 ) )
736 if( ! ReadElement( p_xml_reader
, &psz_txt
) )
738 input_item_SetDescription( p_current_input
, psz_txt
);
741 /* Base Node handler */
742 if( !strncasecmp( psz_node
, "BASE", 4 ) )
744 if( ! ReadElement( p_xml_reader
, &psz_base
) )
748 /* Entry Ref Handler */
749 if( !strncasecmp( psz_node
, "ENTRYREF", 7 ) )
754 psz_tmp
= xml_ReaderNextAttr( p_xml_reader
, &psz_node
);
756 while( psz_tmp
&& !strncasecmp( psz_tmp
, "HREF", 4 ) );
760 /* Create new input item */
761 input_item_t
*p_input
;
762 psz_txt
= strdup( psz_node
);
763 vlc_xml_decode( psz_txt
);
764 p_input
= input_item_New( psz_txt
, psz_title_asx
);
765 input_item_CopyOptions( p_input
, p_current_input
);
766 input_item_node_AppendItem( p_subitems
, p_input
);
768 input_item_Release( p_input
);
772 if( !strncasecmp( psz_node
, "ENTRY", 5 ) )
774 ProcessEntry( &i_n_entry
, p_xml_reader
, p_subitems
,
775 p_current_input
, psz_base
);
777 /* FIXME Unsupported elements
786 while( i_type
!= XML_READER_ENDELEM
|| strncasecmp( psz_node
, "ASX", 3 ) );
790 free( psz_title_asx
);
791 free( psz_entryref
);
795 xml_ReaderDelete( p_xml_reader
);
797 vlc_stream_Delete( p_stream
);