Fix: vimeo: title parsing with accented chars.
[clive.git] / lib / clive / Video.pm
blob9f6ef52c8cf8bf97a5df527c260a0194bd4280ed
1 # -*- coding: ascii -*-
2 ###########################################################################
3 # clive, command line video extraction utility.
5 # Copyright 2009 Toni Gundogdu.
7 # This file is part of clive.
9 # clive is free software: you can redistribute it and/or modify it under
10 # the terms of the GNU General Public License as published by the Free
11 # Software Foundation, either version 3 of the License, or (at your option)
12 # any later version.
14 # clive is distributed in the hope that it will be useful, but WITHOUT ANY
15 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
16 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
17 # details.
19 # You should have received a copy of the GNU General Public License along
20 # with this program. If not, see <http://www.gnu.org/licenses/>.
21 ###########################################################################
22 package clive::Video;
24 use warnings;
25 use strict;
27 use Carp;
28 use POSIX;
29 use File::Basename qw(basename);
30 use File::Spec::Functions;
31 use Cwd qw(getcwd);
32 use Encode qw(decode_utf8);
34 use clive::Util;
36 our $AUTOLOAD;
38 sub new {
39 my $class = shift;
40 my %fields = (
41 page_link => undef,
42 video_id => undef,
43 file_length => undef,
44 content_type => undef,
45 file_suffix => undef,
46 video_link => undef,
47 video_host => undef,
48 video_format => undef,
49 base_filename => undef,
50 filename => undef,
51 initial_length => undef,
52 time_stamp => undef,
53 nothing_todo => undef,
55 my $self = {
56 _permitted => \%fields,
57 %fields,
59 return bless( $self, $class );
62 sub page_title {
63 my $self = shift;
64 if (@_) {
65 my ( $content, $title ) = @_;
66 if ( !$title ) {
67 require HTML::TokeParser;
68 my $p = HTML::TokeParser->new($content);
69 $p->get_tag("title");
70 $self->{page_title} = $p->get_trimmed_text;
71 _cleanupTitle($self);
73 else {
74 $self->{page_title} = $title;
77 return $self->{page_title};
80 sub printVideo {
81 my $self = shift;
82 my $str = sprintf(
83 "file: %s %.1fM [%s]\n",
84 $self->{base_filename},
85 clive::Util::toMB( $self->{file_length} ),
86 $self->{content_type}
88 clive::Log->instance->out($str);
91 sub emitCSV {
92 my $self = shift;
94 require URI::Escape;
96 my @fields = qw(base_filename file_length video_link);
98 my $str = "csv:";
99 $str .= sprintf( qq/"%s",/, $self->$_ ) foreach (@fields);
100 $str =~ s/,$//;
102 clive::Log->instance->out("$str\n");
105 sub formatOutputFilename {
106 my $self = shift;
108 my $config = clive::Config->instance->config;
109 my $fname;
111 if ( !$config->{output_file} ) {
113 # Apply character-class.
114 my $title = $self->{page_title};
115 my $cclass = $config->{cclass} || qr|\w|;
117 $title = join( '', $self->{page_title} =~ /$cclass/g )
118 if ( !$config->{no_cclass} );
120 # Format output filename.
121 $fname = $config->{filename_format} || "%t.%s";
123 my $id = $self->{video_id};
124 $id =~ s/-/_/g;
126 $title = $id
127 if ( !$title && $fname !~ /%i/ );
129 $fname =~ s/%t/$title/;
130 $fname =~ s/%s/$self->{file_suffix}/;
131 $fname =~ s/%i/$id/;
132 $fname =~ s/%h/$self->{video_host}/;
134 my $config = clive::Config->instance->config;
135 $fname = catfile( $config->{save_dir} || getcwd, $fname );
137 my $tmp = $fname;
139 for ( my $i = 1; $i < 9999; ++$i ) {
140 $self->{initial_length} = clive::Util::fileExists($fname);
142 if ( $self->{initial_length} == 0 ) {
143 last;
145 elsif ( $self->{initial_length} == $self->{file_length} ) {
146 $self->{nothing_todo} = 1;
147 last;
149 else {
150 if ( $config->{continue} ) {
151 last;
154 $fname = "$tmp.$i";
157 else {
158 $self->{initial_length}
159 = clive::Util->fileExists( $config->{output_file} );
160 if ( $self->{initial_length} == $self->{file_length} ) {
161 $self->{nothing_todo} = 1;
163 else {
164 $fname = $config->{output_file};
168 if ( !$config->{continue} ) {
169 $self->{initial_length} = 0;
172 $self->{base_filename} = basename($fname);
173 $self->{filename} = $fname;
176 sub fromCacheRecord {
177 my ( $self, $record ) = @_;
179 # No need to keep order in sync with clive::Video::toCacheRecord
180 # or clive::Cache::_mapRecord -- just make sure each item gets
181 # set here.
182 $self->{page_title} = decode_utf8( $$record{page_title} );
183 $self->{page_link} = $$record{page_link};
184 $self->{video_id} = $$record{video_id};
185 $self->{video_link} = $$record{video_link};
186 $self->{video_host} = $$record{video_host};
187 $self->{video_format} = $$record{video_format};
188 $self->{file_length} = $$record{file_length};
189 $self->{file_suffix} = $$record{file_suffix};
190 $self->{content_type} = $$record{content_type};
191 $self->{time_stamp} = $$record{time_stamp};
193 _cleanupTitle($self);
196 sub toCacheRecord {
197 my $self = shift;
199 # Should really remove all '#' from the strings
200 # before storing them. Living on the edge.
201 $self->{page_title} =~ tr{#}//d;
202 my $title = decode_utf8( $self->{page_title} );
204 # Keep the order in sync with clive::Cache::_mapRecord.
205 my $record
206 = $title . "#"
207 . $self->{page_link} . "#"
208 . $self->{video_id} . "#"
209 . $self->{video_link} . "#"
210 . $self->{video_host} . "#"
211 . $self->{video_format} . "#"
212 . $self->{file_length} . "#"
213 . $self->{file_suffix} . "#"
214 . $self->{content_type} . "#"
215 . POSIX::strftime( "%F %T", localtime ) # time_stamp
217 return $record;
220 sub _cleanupTitle {
221 my $self = shift;
222 my $title = $self->{page_title};
224 $title =~ s/youtube|liveleak.com|sevenload|dailymotion|on vimeo//gi;
225 $title =~ s/cctv.com|redtube|ehrensenf|clipfish|funny hub//gi;
226 $title =~ s/video(s?)//gi;
227 $title =~ s/^[-\s]+//;
228 $title =~ s/\s+$//;
230 $self->{page_title} = $title;
233 sub AUTOLOAD {
234 my $self = shift;
235 my $type = ref($self)
236 or croak("$self is not an object");
237 my $name = $AUTOLOAD;
238 $name =~ s/.*://;
239 unless ( exists( $self->{_permitted}->{$name} ) ) {
240 croak("cannot access `$name' field in class $type");
242 if (@_) {
243 return $self->{$name} = shift;
245 else {
246 return $self->{$name};
252 # Barefoot servants too.