1 /*****************************************************************************
2 * asx.c : ASX playlist format import
3 *****************************************************************************
4 * Copyright (C) 2005-2013 VLC authors and VideoLAN
6 * Authors: Derk-Jan Hartman <hartman at videolan dot org>
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU Lesser General Public License as published by
10 * the Free Software Foundation; either version 2.1 of the License, or
11 * (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public License
19 * along with this program; if not, write to the Free Software Foundation,
20 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
21 *****************************************************************************/
24 * http://msdn.microsoft.com/en-us/library/windows/desktop/dd564668.aspx
27 /*****************************************************************************
29 *****************************************************************************/
34 #include <vlc_common.h>
35 #include <vlc_access.h>
37 #include <vlc_strings.h>
38 #include <vlc_charset.h>
39 #include <vlc_memstream.h>
47 /*****************************************************************************
49 *****************************************************************************/
50 static int ReadDir( stream_t
*, input_item_node_t
* );
52 static bool ParseTime(xml_reader_t
*p_xml_reader
, vlc_tick_t
* pi_result
)
55 char *psz_value
= NULL
;
56 char *psz_start
= NULL
;
58 const char *psz_node
= NULL
;
59 const char *psz_txt
= NULL
;
61 int i_subfractions
= -1;
64 vlc_tick_t i_result
= 0;
68 psz_txt
= xml_ReaderNextAttr( p_xml_reader
, &psz_node
);
70 while( psz_txt
&& strncasecmp( psz_txt
, "VALUE", 5 ) );
75 psz_value
= strdup( psz_node
);
76 psz_start
= psz_value
;
80 if( isdigit( *psz_value
) )
82 i_subresult
= i_subresult
* 10;
83 i_subresult
+= *psz_value
- '0';
84 if( i_subfractions
!= -1 )
87 else if( *psz_value
== ':' )
89 i_result
+= i_subresult
;
90 i_result
= i_result
* 60;
93 else if( *psz_value
== '.' )
96 i_result
+= i_subresult
;
102 if( i_subfractions
== -1)
103 i_result
+= i_subresult
;
105 /* Convert to microseconds */
106 if( i_subfractions
== -1)
108 while( i_subfractions
< 6 )
110 i_subresult
= i_subresult
* 10;
113 i_result
= i_result
* CLOCK_FREQ
;
114 if( i_subfractions
!= -1)
115 i_result
+= VLC_TICK_FROM_US( i_subresult
);
118 *pi_result
= i_result
;
122 static bool ReadElement( xml_reader_t
*p_xml_reader
, char **ppsz_txt
)
124 const char *psz_node
= NULL
;
126 /* Read the text node */
127 int ret
= xml_ReaderNextNode( p_xml_reader
, &psz_node
);
131 *ppsz_txt
= strdup( psz_node
);
132 vlc_xml_decode( *ppsz_txt
);
134 /* Read the end element */
135 xml_ReaderNextNode( p_xml_reader
, &psz_node
);
137 * Currently we don't check the agreement of start and end element
138 * This function is only used to read the element that cannot have child
139 * according to the reference.
144 static bool PeekASX( stream_t
*s
)
146 const uint8_t *p_peek
;
147 return ( vlc_stream_Peek( s
->s
, &p_peek
, 12 ) == 12
148 && !strncasecmp( (const char*) p_peek
, "<asx version", 12 ) );
151 /*****************************************************************************
152 * Import_ASX: main import function
153 *****************************************************************************/
155 int Import_ASX( vlc_object_t
*p_this
)
157 stream_t
*p_demux
= (stream_t
*)p_this
;
158 char *type
= stream_MimeType( p_demux
->s
);
160 if( stream_HasExtension( p_demux
, ".asx" )
161 || stream_HasExtension( p_demux
, ".wax" )
162 || stream_HasExtension( p_demux
, ".wvx" )
163 || (type
!= NULL
&& (strcasecmp(type
, "video/x-ms-asf") == 0
164 || strcasecmp(type
, "audio/x-ms-wax") == 0)
165 && PeekASX( p_demux
) ) )
167 msg_Dbg( p_demux
, "found valid ASX playlist" );
176 p_demux
->pf_control
= PlaylistControl
;
177 p_demux
->pf_readdir
= ReadDir
;
181 static void ProcessEntry( int *pi_n_entry
, xml_reader_t
*p_xml_reader
,
182 input_item_node_t
*p_subitems
,
183 input_item_t
*p_current_input
, char *psz_prefix
)
185 const char *psz_node
= NULL
;
186 const char *psz_txt
= NULL
;
189 char *psz_title
= NULL
;
190 char *psz_artist
= NULL
;
191 char *psz_copyright
= NULL
;
192 char *psz_moreinfo
= NULL
;
193 char *psz_description
= NULL
;
194 char *psz_name
= NULL
;
195 char *psz_mrl
= NULL
;
196 char *psz_href
= NULL
;
198 input_item_t
*p_entry
= NULL
;
201 vlc_tick_t i_start
= 0;
202 vlc_tick_t i_duration
;
203 char *ppsz_options
[2];
207 i_duration
= INPUT_DURATION_UNSET
;
208 i_type
= xml_ReaderNextNode( p_xml_reader
, &psz_node
);
210 if( i_type
== XML_READER_ERROR
|| i_type
== XML_READER_NONE
)
213 if( i_type
== XML_READER_STARTELEM
)
216 if( !strncasecmp( psz_node
, "TITLE", 5 ) )
218 if( !ReadElement( p_xml_reader
, &psz_title
) )
221 else if( !strncasecmp( psz_node
, "AUTHOR", 6 ) )
223 if( !ReadElement( p_xml_reader
, &psz_artist
) )
226 else if( !strncasecmp( psz_node
, "COPYRIGHT", 9 ) )
228 if( !ReadElement( p_xml_reader
, &psz_copyright
) )
231 else if( !strncasecmp( psz_node
,"MOREINFO", 8 ) )
235 psz_txt
= xml_ReaderNextAttr( p_xml_reader
, &psz_node
);
237 while(psz_txt
&& strncasecmp( psz_txt
, "HREF", 4 ) );
241 if( !ReadElement( p_xml_reader
, &psz_moreinfo
) )
245 psz_moreinfo
= strdup( psz_node
);
246 vlc_xml_decode( psz_moreinfo
);
248 else if( !strncasecmp( psz_node
, "ABSTRACT", 8 ) )
250 if( !ReadElement( p_xml_reader
, &psz_description
) )
253 else if( !strncasecmp( psz_node
, "DURATION", 8 ) )
255 if( !ParseTime( p_xml_reader
, &i_duration
) )
258 else if( !strncasecmp( psz_node
, "STARTTIME", 9 ) )
260 if( !ParseTime( p_xml_reader
, &i_start
) )
264 /* All ref node will be converted into an entry */
265 else if( !strncasecmp( psz_node
, "REF", 3 ) )
267 *pi_n_entry
= *pi_n_entry
+ 1;
270 psz_title
= input_item_GetTitle( p_current_input
);
272 psz_artist
= input_item_GetArtist( p_current_input
);
274 psz_copyright
= input_item_GetCopyright( p_current_input
);
275 if( !psz_description
)
276 psz_description
= input_item_GetDescription( p_current_input
);
280 psz_txt
= xml_ReaderNextAttr( p_xml_reader
, &psz_node
);
282 while( psz_txt
!= NULL
&& strncasecmp( psz_txt
, "HREF", 4) );
283 if( psz_txt
== NULL
)
285 psz_href
= strdup( psz_node
);
287 if( asprintf( &psz_name
, "%d. %s", *pi_n_entry
, psz_title
) == -1)
288 psz_name
= strdup( psz_title
);
289 vlc_xml_decode( psz_href
);
290 psz_mrl
= ProcessMRL( psz_href
, psz_prefix
);
292 /* Add Time information */
296 if( asprintf( ppsz_options
, ":start-time=%"PRId64
,
297 SEC_FROM_VLC_TICK(i_start
) ) != -1)
302 if( asprintf( ppsz_options
+ i_options
,
303 ":stop-time=%"PRId64
,
304 SEC_FROM_VLC_TICK(i_start
+ i_duration
) ) != -1)
308 /* Create the input item */
309 p_entry
= input_item_NewExt( psz_mrl
, psz_name
, i_duration
,
310 ITEM_TYPE_UNKNOWN
, ITEM_NET_UNKNOWN
);
311 if( p_entry
== NULL
)
314 input_item_AddOptions( p_entry
, i_options
,
315 (const char **)ppsz_options
,
316 VLC_INPUT_OPTION_TRUSTED
);
318 /* Add the metadata */
320 input_item_SetTitle( p_entry
, psz_name
);
322 input_item_SetArtist( p_entry
, psz_artist
);
324 input_item_SetCopyright( p_entry
, psz_copyright
);
326 input_item_SetURL( p_entry
, psz_moreinfo
);
327 if( psz_description
)
328 input_item_SetDescription( p_entry
, psz_description
);
330 p_entry
->i_duration
= i_duration
;
332 input_item_node_AppendItem( p_subitems
, p_entry
);
334 input_item_Release( p_entry
);
338 free( ppsz_options
[--i_options
] );
344 while( i_type
!= XML_READER_ENDELEM
|| strncasecmp( psz_node
, "ENTRY", 5 ) );
349 free( psz_copyright
);
350 free( psz_moreinfo
);
351 free( psz_description
);
354 /// this looks for patterns like &name; &#DEC; or &#xHEX;
355 static bool isXmlEncoded(const char* psz_str
)
357 assert( psz_str
!= NULL
);
358 //look for special characters
359 if( strpbrk(psz_str
, "<>'\"") != NULL
)
362 bool is_escaped
= false;
365 const char* psz_amp
= strchr(psz_str
, '&');
366 if( psz_amp
== NULL
)
368 const char* psz_end
= strchr(psz_amp
, ';');
369 if( psz_end
== NULL
)
372 else if(psz_amp
[1] == '#')
374 if( psz_amp
[2] == 'x' )
376 const char* psz_ptr
= &psz_amp
[3];
377 if( psz_ptr
== psz_end
)
379 for ( ; psz_ptr
< psz_end
; psz_ptr
++)
380 if( ! isxdigit( *psz_ptr
) )
385 const char* psz_ptr
= &(psz_amp
[2]);
386 if( psz_ptr
== psz_end
)
388 for ( ; psz_ptr
< psz_end
; psz_ptr
++)
389 if( ! isdigit( *psz_ptr
) )
395 const char* psz_ptr
= &(psz_amp
[1]);
396 if( psz_ptr
== psz_end
)
398 for ( ; psz_ptr
< psz_end
; psz_ptr
++)
399 if( ! isalnum( *psz_ptr
) )
408 static void memstream_puts_xmlencoded(struct vlc_memstream
* p_stream
, const char* psz_begin
, const char* psz_end
)
410 char *psz_tmp
= NULL
;
412 psz_tmp
= strdup( psz_begin
);
414 psz_tmp
= strndup( psz_begin
, psz_end
- psz_begin
);
416 if ( psz_tmp
== NULL
)
419 if( isXmlEncoded( psz_tmp
) )
420 vlc_memstream_puts( p_stream
, psz_tmp
);
423 char *psz_tmp_encoded
= vlc_xml_encode( psz_tmp
);
424 if ( !psz_tmp_encoded
)
429 vlc_memstream_puts( p_stream
, psz_tmp_encoded
);
430 free( psz_tmp_encoded
);
436 * ASX doesn't requires to be a strict XML document, this function will
437 * - make tags and attributes upercase
438 * - escape strings when required
440 static char* ASXToXML( char* psz_source
)
442 bool b_in_string
= false;
443 char *psz_source_cur
= psz_source
;
444 char *psz_source_old
= psz_source
;
447 struct vlc_memstream stream_out
;
448 if( vlc_memstream_open( &stream_out
) != 0 )
451 while ( psz_source_cur
!= NULL
&& *psz_source_cur
!= '\0' )
453 psz_source_old
= psz_source_cur
;
455 if( ( psz_source_cur
= strchr( psz_source_cur
, '<' ) ) == NULL
)
457 memstream_puts_xmlencoded(&stream_out
, psz_source_old
, NULL
);
458 //vlc_memstream_puts( &stream_out, psz_source_old );
462 memstream_puts_xmlencoded(&stream_out
, psz_source_old
, psz_source_cur
);
463 psz_source_old
= psz_source_cur
;
465 //skip if comment, no need to copy them to the ouput.
466 if( strncmp( psz_source_cur
, "<!--", 4 ) == 0 )
469 psz_source_cur
= strstr( psz_source_cur
, "-->" );
470 if( psz_source_cur
== NULL
)
480 vlc_memstream_putc( &stream_out
, '<' );
484 for ( ; *psz_source_cur
!= '\0'; psz_source_cur
++ )
486 if( b_in_string
== false )
488 if( *psz_source_cur
== '>')
490 vlc_memstream_putc( &stream_out
, '>' );
494 if( *psz_source_cur
== '"' || *psz_source_cur
== '\'' )
496 c_string_delim
= *psz_source_cur
;
498 vlc_memstream_putc( &stream_out
, c_string_delim
);
502 //convert tag and attributes to upper case
503 vlc_memstream_putc( &stream_out
, vlc_ascii_toupper( *psz_source_cur
) );
508 psz_source_old
= psz_source_cur
;
509 psz_source_cur
= strchr( psz_source_cur
, c_string_delim
);
510 if( psz_source_cur
== NULL
)
513 memstream_puts_xmlencoded(&stream_out
, psz_source_old
, psz_source_cur
);
514 vlc_memstream_putc( &stream_out
, c_string_delim
);
519 if( vlc_memstream_close( &stream_out
) != 0 )
522 return stream_out
.ptr
;
525 static char *detectXmlEncoding( const char *psz_xml
)
527 const char *psz_keyword_begin
= NULL
;
528 const char *psz_keyword_end
= NULL
;
530 const char *psz_value_begin
= NULL
;
531 const char *psz_value_end
= NULL
;
533 psz_xml
+= strspn( psz_xml
, " \n\r\t" );
534 if( strncasecmp( psz_xml
, "<?xml", 5 ) != 0 )
538 const char *psz_end
= strstr( psz_xml
, "?>" );
539 if( psz_end
== NULL
)
542 while( psz_xml
< psz_end
)
544 psz_keyword_begin
= psz_xml
= psz_xml
+ strspn( psz_xml
, " \n\r\t" );
545 if( *psz_xml
== '\0' )
547 psz_keyword_end
= psz_xml
= psz_xml
+ strcspn( psz_xml
, " \n\r\t=" );
548 if( *psz_xml
== '\0' )
551 psz_xml
+= strspn( psz_xml
, " \n\r\t" );
552 if( *psz_xml
!= '=' )
556 psz_xml
+= strspn( psz_xml
, " \n\r\t" );
557 char quote
= *psz_xml
;
558 if( quote
!= '"' && quote
!= '\'' )
561 psz_value_begin
= ++psz_xml
;
562 psz_value_end
= psz_xml
= strchr( psz_xml
, quote
);
563 if( psz_xml
== NULL
)
567 if( strncasecmp( psz_keyword_begin
, "encoding", psz_keyword_end
- psz_keyword_begin
) == 0
568 && ( psz_value_end
-psz_value_begin
) > 0 )
570 return strndup(psz_value_begin
, psz_value_end
-psz_value_begin
);
578 static stream_t
* PreparseStream( stream_t
*p_demux
)
580 stream_t
*s
= p_demux
->s
;
582 static const size_t maxsize
= 1024 * 1024;
584 if( vlc_stream_GetSize( s
, &streamSize
) != VLC_SUCCESS
)
585 streamSize
= maxsize
;
587 // Don't attempt to convert/store huge streams
588 if( streamSize
> maxsize
)
590 char* psz_source
= malloc( streamSize
+ 1 * sizeof( *psz_source
) );
591 if ( unlikely( psz_source
== NULL
) )
596 ssize_t i_ret
= vlc_stream_Read( s
, psz_source
+ i_read
,
597 streamSize
> 1024 ? 1024 : streamSize
);
600 assert( (size_t)i_ret
<= streamSize
);
603 } while ( streamSize
> 0 );
604 psz_source
[i_read
] = 0;
607 char *encoding
= detectXmlEncoding( psz_source
);
608 if( encoding
!= NULL
)
610 if( strcasecmp( encoding
, "UTF-8" ) == 0 )
614 //strip xml prologue to avoid double conversion
615 char *tmp
= strstr( psz_source
, "?>" ) + 2;
616 tmp
= FromCharset( encoding
, tmp
, strlen( tmp
) );
624 else if( !IsUTF8( psz_source
) )
626 char *tmp
= FromLocaleDup( psz_source
);
633 char *psz_source_xml
= ASXToXML( psz_source
);
635 if( psz_source_xml
== NULL
)
638 stream_t
* p_stream
= vlc_stream_MemoryNew( p_demux
, (uint8_t*)psz_source_xml
, strlen(psz_source_xml
), false );
642 static int ReadDir( stream_t
*p_demux
, input_item_node_t
*p_subitems
)
644 if (unlikely(p_demux
->psz_url
== NULL
))
647 const char *psz_node
= NULL
;
648 char *psz_txt
= NULL
;
649 char *psz_base
= strdup( p_demux
->psz_url
);
650 if (unlikely(psz_base
== NULL
))
653 char *psz_title_asx
= NULL
;
654 char *psz_entryref
= NULL
;
656 xml_reader_t
*p_xml_reader
= NULL
;
657 input_item_t
*p_current_input
= GetCurrentItem( p_demux
);
658 stream_t
* p_stream
= PreparseStream( p_demux
);
660 bool b_first_node
= false;
664 p_xml_reader
= xml_ReaderCreate( p_demux
, p_stream
? p_stream
668 msg_Err( p_demux
, "Cannot parse ASX input file as XML");
674 i_type
= xml_ReaderNextNode( p_xml_reader
, &psz_node
);
675 if( i_type
== XML_READER_ERROR
)
678 if( i_type
== XML_READER_STARTELEM
)
682 if(!strncasecmp( psz_node
, "ASX", 3 ) )
686 msg_Err( p_demux
, "invalid root node" );
691 /* Metadata Node Handler */
692 if( !strncasecmp( psz_node
, "TITLE", 5 ) )
694 if( ! ReadElement( p_xml_reader
, &psz_title_asx
) )
696 input_item_SetTitle( p_current_input
, psz_title_asx
);
698 else if( !strncasecmp( psz_node
, "AUTHOR", 6 ) )
700 if( ! ReadElement( p_xml_reader
, &psz_txt
) )
702 input_item_SetArtist( p_current_input
, psz_txt
);
704 else if( !strncasecmp( psz_node
, "COPYRIGHT", 9 ) )
706 if( ! ReadElement( p_xml_reader
, &psz_txt
) )
708 input_item_SetCopyright( p_current_input
, psz_txt
);
710 else if( !strncasecmp( psz_node
, "MOREINFO", 8 ) )
715 psz_tmp
= xml_ReaderNextAttr( p_xml_reader
, &psz_node
);
717 while( psz_tmp
&& strncasecmp( psz_tmp
, "HREF", 4 ) );
719 if( !psz_tmp
) // If HREF attribute doesn't exist
721 if( ! ReadElement( p_xml_reader
, &psz_txt
) )
725 psz_txt
= strdup( psz_node
);
727 vlc_xml_decode( psz_txt
);
728 input_item_SetURL( p_current_input
, psz_txt
);
730 else if( !strncasecmp( psz_node
, "ABSTRACT", 8 ) )
732 if( ! ReadElement( p_xml_reader
, &psz_txt
) )
734 input_item_SetDescription( p_current_input
, psz_txt
);
737 /* Base Node handler */
738 if( !strncasecmp( psz_node
, "BASE", 4 ) )
740 if( ! ReadElement( p_xml_reader
, &psz_base
) )
744 /* Entry Ref Handler */
745 if( !strncasecmp( psz_node
, "ENTRYREF", 7 ) )
750 psz_tmp
= xml_ReaderNextAttr( p_xml_reader
, &psz_node
);
752 while( psz_tmp
&& !strncasecmp( psz_tmp
, "HREF", 4 ) );
756 /* Create new input item */
757 input_item_t
*p_input
;
758 psz_txt
= strdup( psz_node
);
759 vlc_xml_decode( psz_txt
);
760 p_input
= input_item_New( psz_txt
, psz_title_asx
);
761 input_item_node_AppendItem( p_subitems
, p_input
);
763 input_item_Release( p_input
);
767 if( !strncasecmp( psz_node
, "ENTRY", 5 ) )
769 ProcessEntry( &i_n_entry
, p_xml_reader
, p_subitems
,
770 p_current_input
, psz_base
);
772 /* FIXME Unsupported elements
781 while( i_type
!= XML_READER_ENDELEM
|| strncasecmp( psz_node
, "ASX", 3 ) );
785 free( psz_title_asx
);
786 free( psz_entryref
);
790 xml_ReaderDelete( p_xml_reader
);
792 vlc_stream_Delete( p_stream
);