Tweak: dash replacement if gstitle.
[clive.git] / lib / clive / Video.pm
blob7f6f94692615bb7233d8a7fec49e3e0f0c429219
1 # -*- coding: ascii -*-
2 ###########################################################################
3 # clive, command line video extraction utility.
4 # Copyright 2007, 2008, 2009 Toni Gundogdu.
6 # This file is part of clive.
8 # clive is free software: you can redistribute it and/or modify it under
9 # the terms of the GNU General Public License as published by the Free
10 # Software Foundation, either version 3 of the License, or (at your option)
11 # any later version.
13 # clive is distributed in the hope that it will be useful, but WITHOUT ANY
14 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
16 # details.
18 # You should have received a copy of the GNU General Public License along
19 # with this program. If not, see <http://www.gnu.org/licenses/>.
20 ###########################################################################
21 package clive::Video;
23 use warnings;
24 use strict;
26 use Carp;
27 use POSIX;
28 use File::Basename qw(basename);
29 use File::Spec::Functions;
30 use Cwd qw(getcwd);
32 use clive::Util;
34 our $AUTOLOAD;
36 sub new {
37 my $class = shift;
38 my %fields = (
39 page_link => undef,
40 video_id => undef,
41 file_length => undef,
42 content_type => undef,
43 file_suffix => undef,
44 video_link => undef,
45 video_host => undef,
46 video_format => undef,
47 base_filename => undef,
48 filename => undef,
49 initial_length => undef,
50 time_stamp => undef,
51 nothing_todo => undef,
53 my $self = {
54 _permitted => \%fields,
55 %fields,
57 return bless( $self, $class );
60 sub page_title {
61 my $self = shift;
62 if (@_) {
63 my ( $content, $title ) = @_;
64 if ( !$title ) {
65 require HTML::TokeParser;
66 my $p = HTML::TokeParser->new($content);
67 $p->get_tag("title");
68 $self->{page_title} = $p->get_trimmed_text;
69 _cleanupTitle($self);
71 else {
72 $self->{page_title} = $title;
75 return $self->{page_title};
78 sub printVideo {
79 my $self = shift;
80 my $str = sprintf(
81 "file: %s %.1fM [%s]\n",
82 $self->{base_filename},
83 clive::Util::toMB( $self->{file_length} ),
84 $self->{content_type}
86 clive::Log->instance->out($str);
89 sub emitCSV {
90 my $self = shift;
92 require URI::Escape;
94 my @fields = qw(base_filename file_length video_link);
96 my $str = "csv:";
97 $str .= sprintf( qq/"%s",/, $self->$_ ) foreach (@fields);
98 $str =~ s/,$//;
100 clive::Log->instance->out("$str\n");
103 sub formatOutputFilename {
104 my $self = shift;
106 my $config = clive::Config->instance->config;
107 my $fname;
109 if ( !$config->{output_file} ) {
111 # Apply character-class.
112 my $title = $self->{page_title};
113 my $cclass = $config->{cclass} || qr|\w|;
115 $title = join( '', $self->{page_title} =~ /$cclass/g )
116 if ( !$config->{no_cclass} );
118 # Format output filename.
119 $fname = $config->{filename_format} || "%t.%s";
121 if ( !$title && $fname !~ /%i/ ) {
122 $title = $self->{video_id};
123 $title =~ s/-/_/g;
126 $fname =~ s/%t/$title/;
127 $fname =~ s/%s/$self->{file_suffix}/;
128 $fname =~ s/%i/$self->{video_id}/;
129 $fname =~ s/%h/$self->{video_host}/;
131 my $config = clive::Config->instance->config;
132 $fname = catfile( $config->{save_dir} || getcwd, $fname );
134 my $tmp = $fname;
136 for ( my $i = 1; $i < 9999; ++$i ) {
137 $self->{initial_length} = clive::Util::fileExists($fname);
139 if ( $self->{initial_length} == 0 ) {
140 last;
142 elsif ( $self->{initial_length} == $self->{file_length} ) {
143 $self->{nothing_todo} = 1;
144 last;
146 else {
147 if ( $config->{continue} ) {
148 last;
151 $fname = "$tmp.$i";
154 else {
155 $self->{initial_length}
156 = clive::Util->fileExists( $config->{output_file} );
157 if ( $self->{initial_length} == $self->{file_length} ) {
158 $self->{nothing_todo} = 1;
160 else {
161 $fname = $config->{output_file};
165 if ( !$config->{continue} ) {
166 $self->{initial_length} = 0;
169 $self->{base_filename} = basename($fname);
170 $self->{filename} = $fname;
173 sub fromCacheRecord {
174 my ( $self, $record ) = @_;
176 # No need to keep order in sync with clive::Video::toCacheRecord
177 # or clive::Cache::_mapRecord -- just make sure each item gets
178 # set here.
179 $self->{page_title} = $$record{page_title};
180 $self->{page_link} = $$record{page_link};
181 $self->{video_id} = $$record{video_id};
182 $self->{video_link} = $$record{video_link};
183 $self->{video_host} = $$record{video_host};
184 $self->{video_format} = $$record{video_format};
185 $self->{file_length} = $$record{file_length};
186 $self->{file_suffix} = $$record{file_suffix};
187 $self->{content_type} = $$record{content_type};
188 $self->{time_stamp} = $$record{time_stamp};
190 _cleanupTitle($self);
193 sub toCacheRecord {
194 my $self = shift;
196 # Should really remove all '#' from the strings
197 # before storing them. Living on the edge.
198 $self->{page_title} =~ tr{#}//d;
200 # Keep the order in sync with clive::Cache::_mapRecord.
201 my $record
202 = $self->{page_title} . "#"
203 . $self->{page_link} . "#"
204 . $self->{video_id} . "#"
205 . $self->{video_link} . "#"
206 . $self->{video_host} . "#"
207 . $self->{video_format} . "#"
208 . $self->{file_length} . "#"
209 . $self->{file_suffix} . "#"
210 . $self->{content_type} . "#"
211 . POSIX::strftime( "%F %T", localtime ) # time_stamp
213 return $record;
216 sub _cleanupTitle {
217 my $self = shift;
218 my $title = $self->{page_title};
220 $title =~ s/(youtube|video|liveleak.com|sevenload|dailymotion)//gi;
221 $title =~ s/(cctv.com|redtube)//gi;
223 $title =~ s/^[-\s]+//;
224 $title =~ s/\s+$//;
226 $self->{page_title} = $title;
229 sub AUTOLOAD {
230 my $self = shift;
231 my $type = ref($self)
232 or croak("$self is not an object");
233 my $name = $AUTOLOAD;
234 $name =~ s/.*://;
235 unless ( exists( $self->{_permitted}->{$name} ) ) {
236 croak("cannot access `$name' field in class $type");
238 if (@_) {
239 return $self->{$name} = shift;
241 else {
242 return $self->{$name};
248 # Barefoot servants too.