_cleanupTitle: remove obsolete strings from array
[clive.git] / lib / clive / Video.pm
blob206e6dbceef79d9a19b37bd52b478f6c41adb4b4
1 # -*- coding: ascii -*-
2 ###########################################################################
3 # clive, command line video extraction utility.
5 # Copyright 2009 Toni Gundogdu.
7 # This file is part of clive.
9 # clive is free software: you can redistribute it and/or modify it under
10 # the terms of the GNU General Public License as published by the Free
11 # Software Foundation, either version 3 of the License, or (at your option)
12 # any later version.
14 # clive is distributed in the hope that it will be useful, but WITHOUT ANY
15 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
16 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
17 # details.
19 # You should have received a copy of the GNU General Public License along
20 # with this program. If not, see <http://www.gnu.org/licenses/>.
21 ###########################################################################
22 package clive::Video;
24 use warnings;
25 use strict;
27 use Carp;
28 use POSIX;
29 use File::Basename qw(basename);
30 use File::Spec::Functions;
31 use Cwd qw(getcwd);
32 use Encode qw(decode_utf8);
34 use clive::Util;
36 our $AUTOLOAD;
38 sub new {
39 my $class = shift;
40 my %fields = (
41 page_link => undef,
42 video_id => undef,
43 file_length => undef,
44 content_type => undef,
45 file_suffix => undef,
46 video_link => undef,
47 video_host => undef,
48 video_format => undef,
49 base_filename => undef,
50 filename => undef,
51 initial_length => undef,
52 time_stamp => undef,
53 nothing_todo => undef,
55 my $self = {
56 _permitted => \%fields,
57 %fields,
59 return bless( $self, $class );
62 sub page_title {
63 my $self = shift;
64 if (@_) {
65 my ( $content, $title ) = @_;
66 if ( !$title ) {
67 require HTML::TokeParser;
68 my $p = HTML::TokeParser->new($content);
69 $p->get_tag("title");
70 $self->{page_title} = $p->get_trimmed_text;
71 _cleanupTitle($self);
73 else {
74 $self->{page_title} = clive::Util::fromEntities($title);
77 return $self->{page_title};
80 sub printVideo {
81 my $self = shift;
82 my $str = sprintf(
83 "file: %s %.1fM [%s]\n",
84 $self->{base_filename},
85 clive::Util::toMB( $self->{file_length} ),
86 $self->{content_type}
88 clive::Log->instance->out($str);
91 sub emitCSV {
92 my $self = shift;
94 require URI::Escape;
96 my @fields = qw(base_filename file_length video_link);
98 my $str = "csv:";
99 $str .= sprintf( qq/"%s",/, $self->$_ ) foreach (@fields);
100 $str =~ s/,$//;
102 clive::Log->instance->out("$str\n");
105 sub formatOutputFilename {
106 my ( $self, $suffix, $index ) = @_;
108 my $config = clive::Config->instance->config;
109 my $fname;
111 if ( !$config->{output_file} ) {
113 # Apply character-class.
114 my $title = $self->{page_title};
115 my $cclass = $config->{cclass} || qr|\w|;
117 $title = join( '', $self->{page_title} =~ /$cclass/g )
118 if ( !$config->{no_cclass} );
120 # Format output filename.
121 $fname = $config->{filename_format} || "%t.%s";
123 my $id = $self->{video_id};
125 # $id =~ s/-/_/g;
127 $title = $id
128 if ( !$title && $fname !~ /%i/ );
130 $fname =~ s/%t/$title/;
131 if ( $self->{file_suffix} ) {
132 $fname =~ s/%s/$self->{file_suffix}/;
134 elsif ($suffix) {
135 my $tmp = "$index.$suffix";
136 $fname =~ s/%s/$tmp/;
138 $fname =~ s/%i/$id/;
139 $fname =~ s/%h/$self->{video_host}/;
141 my $config = clive::Config->instance->config;
142 my $path = Encode::decode_utf8( $config->{save_dir} or getcwd );
144 $fname = catfile( $path, $fname );
145 my $tmp = $fname;
147 for ( my $i = 1; $i < 9999; ++$i ) {
148 $self->{initial_length} = clive::Util::fileExists($fname);
150 if ( $self->{initial_length} == 0 ) {
151 last;
153 elsif ( $self->{initial_length} == $self->{file_length} ) {
154 $self->{nothing_todo} = 1;
155 last;
157 else {
158 if ( $config->{continue} ) {
159 last;
162 $fname = "$tmp.$i";
165 else {
166 $self->{initial_length}
167 = clive::Util->fileExists( $config->{output_file} );
168 if ( $self->{initial_length} == $self->{file_length} ) {
169 $self->{nothing_todo} = 1;
171 else {
172 $fname = $config->{output_file};
176 if ( !$config->{continue} ) {
177 $self->{initial_length} = 0;
180 $self->{base_filename} = basename($fname);
181 $self->{filename} = $fname;
184 sub fromCacheRecord {
185 my ( $self, $record ) = @_;
187 # No need to keep order in sync with clive::Video::toCacheRecord
188 # or clive::Cache::_mapRecord -- just make sure each item gets
189 # set here.
190 $self->{page_title} = decode_utf8( $$record{page_title} );
191 $self->{page_link} = $$record{page_link};
192 $self->{video_id} = $$record{video_id};
193 $self->{video_link} = $$record{video_link};
194 $self->{video_host} = $$record{video_host};
195 $self->{video_format} = $$record{video_format};
196 $self->{file_length} = $$record{file_length};
197 $self->{file_suffix} = $$record{file_suffix};
198 $self->{content_type} = $$record{content_type};
199 $self->{time_stamp} = $$record{time_stamp};
201 _cleanupTitle($self);
204 sub toCacheRecord {
205 my $self = shift;
207 # Should really remove all '#' from the strings
208 # before storing them. Living on the edge.
209 $self->{page_title} =~ tr{#}//d;
211 my $title = $self->{page_title};
213 =for comment
214 # See: http://code.google.com/p/clive/issues/detail?id=42
215 if ( sprintf( "%vd", $^V ) =~ /(\d+).(\d+).(\d+)/ ) {
216 $title = decode_utf8($title)
217 if ( $1 >= 5 && $2 >= 10 );
219 =cut
221 # Keep the order in sync with clive::Cache::_mapRecord.
222 my $record
223 = $title . "#"
224 . $self->{page_link} . "#"
225 . $self->{video_id} . "#"
226 . $self->{video_link} . "#"
227 . $self->{video_host} . "#"
228 . $self->{video_format} . "#"
229 . $self->{file_length} . "#"
230 . $self->{file_suffix} . "#"
231 . $self->{content_type} . "#"
232 . POSIX::strftime( "%F %T", localtime ) # time_stamp
234 return $record;
237 sub _cleanupTitle {
238 my $self = shift;
239 my $title = $self->{page_title};
241 $title =~ s/youtube|liveleak.com|dailymotion|on vimeo//gi;
242 $title =~ s/|clipfish|funny hub//gi;
243 $title =~ s/video(s?)//gi;
244 $title =~ s/^[-\s]+//;
245 $title =~ s/\s+$//;
247 $self->{page_title} = $title;
250 sub AUTOLOAD {
251 my $self = shift;
252 my $type = ref($self)
253 or croak("$self is not an object");
254 my $name = $AUTOLOAD;
255 $name =~ s/.*://;
256 unless ( exists( $self->{_permitted}->{$name} ) ) {
257 croak("cannot access `$name' field in class $type");
259 if (@_) {
260 return $self->{$name} = shift;
262 else {
263 return $self->{$name};
269 # Barefoot servants too.