Replace dashes ("-") with underscores ("_") in video ids.
[clive.git] / lib / clive / Video.pm
blob2e3dec8208c2fdd7de626149a2d7d4288038cd05
1 # -*- coding: ascii -*-
2 ###########################################################################
3 # clive, command line video extraction utility.
4 # Copyright 2007, 2008, 2009 Toni Gundogdu.
6 # This file is part of clive.
8 # clive is free software: you can redistribute it and/or modify it under
9 # the terms of the GNU General Public License as published by the Free
10 # Software Foundation, either version 3 of the License, or (at your option)
11 # any later version.
13 # clive is distributed in the hope that it will be useful, but WITHOUT ANY
14 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
16 # details.
18 # You should have received a copy of the GNU General Public License along
19 # with this program. If not, see <http://www.gnu.org/licenses/>.
20 ###########################################################################
21 package clive::Video;
23 use warnings;
24 use strict;
26 use Carp;
27 use POSIX;
28 use File::Basename qw(basename);
29 use File::Spec::Functions;
30 use Cwd qw(getcwd);
31 use Encode qw(decode_utf8);
33 use clive::Util;
35 our $AUTOLOAD;
37 sub new {
38 my $class = shift;
39 my %fields = (
40 page_link => undef,
41 video_id => undef,
42 file_length => undef,
43 content_type => undef,
44 file_suffix => undef,
45 video_link => undef,
46 video_host => undef,
47 video_format => undef,
48 base_filename => undef,
49 filename => undef,
50 initial_length => undef,
51 time_stamp => undef,
52 nothing_todo => undef,
54 my $self = {
55 _permitted => \%fields,
56 %fields,
58 return bless( $self, $class );
61 sub page_title {
62 my $self = shift;
63 if (@_) {
64 my ( $content, $title ) = @_;
65 if ( !$title ) {
66 require HTML::TokeParser;
67 my $p = HTML::TokeParser->new($content);
68 $p->get_tag("title");
69 $self->{page_title} = $p->get_trimmed_text;
70 _cleanupTitle($self);
72 else {
73 $self->{page_title} = $title;
76 return $self->{page_title};
79 sub printVideo {
80 my $self = shift;
81 my $str = sprintf(
82 "file: %s %.1fM [%s]\n",
83 $self->{base_filename},
84 clive::Util::toMB( $self->{file_length} ),
85 $self->{content_type}
87 clive::Log->instance->out($str);
90 sub emitCSV {
91 my $self = shift;
93 require URI::Escape;
95 my @fields = qw(base_filename file_length video_link);
97 my $str = "csv:";
98 $str .= sprintf( qq/"%s",/, $self->$_ ) foreach (@fields);
99 $str =~ s/,$//;
101 clive::Log->instance->out("$str\n");
104 sub formatOutputFilename {
105 my $self = shift;
107 my $config = clive::Config->instance->config;
108 my $fname;
110 if ( !$config->{output_file} ) {
112 # Apply character-class.
113 my $title = $self->{page_title};
114 my $cclass = $config->{cclass} || qr|\w|;
116 $title = join( '', $self->{page_title} =~ /$cclass/g )
117 if ( !$config->{no_cclass} );
119 # Format output filename.
120 $fname = $config->{filename_format} || "%t.%s";
122 my $id = $self->{video_id};
123 $id =~ s/-/_/g;
125 $title = $id
126 if ( !$title && $fname !~ /%i/ );
128 $fname =~ s/%t/$title/;
129 $fname =~ s/%s/$self->{file_suffix}/;
130 $fname =~ s/%i/$id/;
131 $fname =~ s/%h/$self->{video_host}/;
133 my $config = clive::Config->instance->config;
134 $fname = catfile( $config->{save_dir} || getcwd, $fname );
136 my $tmp = $fname;
138 for ( my $i = 1; $i < 9999; ++$i ) {
139 $self->{initial_length} = clive::Util::fileExists($fname);
141 if ( $self->{initial_length} == 0 ) {
142 last;
144 elsif ( $self->{initial_length} == $self->{file_length} ) {
145 $self->{nothing_todo} = 1;
146 last;
148 else {
149 if ( $config->{continue} ) {
150 last;
153 $fname = "$tmp.$i";
156 else {
157 $self->{initial_length}
158 = clive::Util->fileExists( $config->{output_file} );
159 if ( $self->{initial_length} == $self->{file_length} ) {
160 $self->{nothing_todo} = 1;
162 else {
163 $fname = $config->{output_file};
167 if ( !$config->{continue} ) {
168 $self->{initial_length} = 0;
171 $self->{base_filename} = basename($fname);
172 $self->{filename} = $fname;
175 sub fromCacheRecord {
176 my ( $self, $record ) = @_;
178 # No need to keep order in sync with clive::Video::toCacheRecord
179 # or clive::Cache::_mapRecord -- just make sure each item gets
180 # set here.
181 $self->{page_title} = decode_utf8( $$record{page_title} );
182 $self->{page_link} = $$record{page_link};
183 $self->{video_id} = $$record{video_id};
184 $self->{video_link} = $$record{video_link};
185 $self->{video_host} = $$record{video_host};
186 $self->{video_format} = $$record{video_format};
187 $self->{file_length} = $$record{file_length};
188 $self->{file_suffix} = $$record{file_suffix};
189 $self->{content_type} = $$record{content_type};
190 $self->{time_stamp} = $$record{time_stamp};
192 _cleanupTitle($self);
195 sub toCacheRecord {
196 my $self = shift;
198 # Should really remove all '#' from the strings
199 # before storing them. Living on the edge.
200 $self->{page_title} =~ tr{#}//d;
201 my $title = decode_utf8( $self->{page_title} );
203 # Keep the order in sync with clive::Cache::_mapRecord.
204 my $record
205 = $title . "#"
206 . $self->{page_link} . "#"
207 . $self->{video_id} . "#"
208 . $self->{video_link} . "#"
209 . $self->{video_host} . "#"
210 . $self->{video_format} . "#"
211 . $self->{file_length} . "#"
212 . $self->{file_suffix} . "#"
213 . $self->{content_type} . "#"
214 . POSIX::strftime( "%F %T", localtime ) # time_stamp
216 return $record;
219 sub _cleanupTitle {
220 my $self = shift;
221 my $title = $self->{page_title};
223 $title =~ s/(youtube|liveleak.com|sevenload|dailymotion)//gi;
224 $title =~ s/(cctv.com|redtube|ehrensenf|clipfish|funny hub)//gi;
225 $title =~ s/video[s]//gi;
226 $title =~ s/^[-\s]+//;
227 $title =~ s/\s+$//;
229 $self->{page_title} = $title;
232 sub AUTOLOAD {
233 my $self = shift;
234 my $type = ref($self)
235 or croak("$self is not an object");
236 my $name = $AUTOLOAD;
237 $name =~ s/.*://;
238 unless ( exists( $self->{_permitted}->{$name} ) ) {
239 croak("cannot access `$name' field in class $type");
241 if (@_) {
242 return $self->{$name} = shift;
244 else {
245 return $self->{$name};
251 # Barefoot servants too.