1 # -*- coding: ascii -*-
2 ###########################################################################
3 # clive, command line video extraction utility.
5 # Copyright 2009 Toni Gundogdu.
7 # This file is part of clive.
9 # clive is free software: you can redistribute it and/or modify it under
10 # the terms of the GNU General Public License as published by the Free
11 # Software Foundation, either version 3 of the License, or (at your option)
14 # clive is distributed in the hope that it will be useful, but WITHOUT ANY
15 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
16 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
19 # You should have received a copy of the GNU General Public License along
20 # with this program. If not, see <http://www.gnu.org/licenses/>.
21 ###########################################################################
29 use File
::Basename
qw(basename);
30 use File
::Spec
::Functions
;
32 use Encode
qw(decode_utf8);
44 content_type
=> undef,
48 video_format
=> undef,
49 base_filename
=> undef,
51 initial_length
=> undef,
53 nothing_todo
=> undef,
56 _permitted
=> \
%fields,
59 return bless( $self, $class );
65 my ( $content, $title ) = @_;
67 require HTML
::TokeParser
;
68 my $p = HTML
::TokeParser
->new($content);
70 $self->{page_title
} = $p->get_trimmed_text;
74 $self->{page_title
} = clive
::Util
::fromEntities
($title);
77 return $self->{page_title
};
83 "file: %s %.1fM [%s]\n",
84 $self->{base_filename
},
85 clive
::Util
::toMB
( $self->{file_length
} ),
88 clive
::Log
->instance->out($str);
96 my @fields = qw(base_filename file_length video_link);
99 $str .= sprintf( qq/"%s",/, $self->$_ ) foreach (@fields);
102 clive
::Log
->instance->out("$str\n");
105 sub formatOutputFilename
{
106 my ( $self, $suffix, $index ) = @_;
108 my $config = clive
::Config
->instance->config;
111 if ( !$config->{output_file
} ) {
113 # Apply character-class.
114 my $title = $self->{page_title
};
115 my $cclass = $config->{cclass
} || qr
|\w
|;
117 $title = join( '', $self->{page_title
} =~ /$cclass/g )
118 if ( !$config->{no_cclass
} );
120 # Format output filename.
121 $fname = $config->{filename_format
} || "%t.%s";
123 my $id = $self->{video_id
};
128 if ( !$title && $fname !~ /%i/ );
130 $fname =~ s/%t/$title/;
131 if ( $self->{file_suffix
} ) {
132 $fname =~ s/%s/$self->{file_suffix}/;
135 my $tmp = "$index.$suffix";
136 $fname =~ s/%s/$tmp/;
139 $fname =~ s/%h/$self->{video_host}/;
141 my $config = clive
::Config
->instance->config;
142 my $path = Encode
::decode_utf8
( $config->{save_dir
} or getcwd
);
144 $fname = catfile
( $path, $fname );
147 for ( my $i = 1; $i < 9999; ++$i ) {
148 $self->{initial_length
} = clive
::Util
::fileExists
($fname);
150 if ( $self->{initial_length
} == 0 ) {
153 elsif ( $self->{initial_length
} == $self->{file_length
} ) {
154 $self->{nothing_todo
} = 1;
158 if ( $config->{continue} ) {
166 $self->{initial_length
}
167 = clive
::Util
->fileExists( $config->{output_file
} );
168 if ( $self->{initial_length
} == $self->{file_length
} ) {
169 $self->{nothing_todo
} = 1;
172 $fname = $config->{output_file
};
176 if ( !$config->{continue} ) {
177 $self->{initial_length
} = 0;
180 $self->{base_filename
} = basename
($fname);
181 $self->{filename
} = $fname;
184 sub fromCacheRecord
{
185 my ( $self, $record ) = @_;
187 # No need to keep order in sync with clive::Video::toCacheRecord
188 # or clive::Cache::_mapRecord -- just make sure each item gets
190 $self->{page_title
} = decode_utf8
( $$record{page_title
} );
191 $self->{page_link
} = $$record{page_link
};
192 $self->{video_id
} = $$record{video_id
};
193 $self->{video_link
} = $$record{video_link
};
194 $self->{video_host
} = $$record{video_host
};
195 $self->{video_format
} = $$record{video_format
};
196 $self->{file_length
} = $$record{file_length
};
197 $self->{file_suffix
} = $$record{file_suffix
};
198 $self->{content_type
} = $$record{content_type
};
199 $self->{time_stamp
} = $$record{time_stamp
};
201 _cleanupTitle
($self);
207 # Should really remove all '#' from the strings
208 # before storing them. Living on the edge.
209 $self->{page_title
} =~ tr
{#}//d;
211 my $title = $self->{page_title
};
214 # See: http://code.google.com/p/clive/issues/detail?id=42
215 if ( sprintf( "%vd", $^V ) =~ /(\d+).(\d+).(\d+)/ ) {
216 $title = decode_utf8($title)
217 if ( $1 >= 5 && $2 >= 10 );
221 # Keep the order in sync with clive::Cache::_mapRecord.
224 . $self->{page_link
} . "#"
225 . $self->{video_id
} . "#"
226 . $self->{video_link
} . "#"
227 . $self->{video_host
} . "#"
228 . $self->{video_format
} . "#"
229 . $self->{file_length
} . "#"
230 . $self->{file_suffix
} . "#"
231 . $self->{content_type
} . "#"
232 . POSIX
::strftime
( "%F %T", localtime ) # time_stamp
239 my $title = $self->{page_title
};
241 $title =~ s/youtube|liveleak.com|dailymotion|on vimeo//gi;
242 $title =~ s/cctv.com|redtube|ehrensenf|clipfish|funny hub//gi;
243 $title =~ s/video(s?)//gi;
244 $title =~ s/^[-\s]+//;
247 $self->{page_title
} = $title;
252 my $type = ref($self)
253 or croak
("$self is not an object");
254 my $name = $AUTOLOAD;
256 unless ( exists( $self->{_permitted
}->{$name} ) ) {
257 croak
("cannot access `$name' field in class $type");
260 return $self->{$name} = shift;
263 return $self->{$name};
269 # Barefoot servants too.