4 # Copyright 2008 Tamil s.a.r.l.
6 # This file is part of Koha.
8 # Koha is free software; you can redistribute it and/or modify it
9 # under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 3 of the License, or
11 # (at your option) any later version.
13 # Koha is distributed in the hope that it will be useful, but
14 # WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with Koha; if not, see <http://www.gnu.org/licenses>.
35 'verbose' => \
$verbose,
41 pod2usage
( -verbose
=> 2 );
45 usage
() if $help || !$conf;
50 print "Reading configuration file: $conf\n" if $verbose;
52 @clouds = LoadFile
( $conf );
54 croak
"Unable to read configuration file: $conf\n" if $@
;
56 for my $cloud ( @clouds ) {
57 print "Create a cloud\n",
58 " Koha conf file: ", $cloud->{KohaConf
} ?
$cloud->{KohaConf
} : "default", "\n",
59 " Zebra Index: ", $cloud->{ZebraIndex
}, "\n",
60 " Koha Keyword: ", $cloud->{KohaIndex
}, "\n",
61 " Count: ", $cloud->{Count
}, "\n",
62 " Withcss: ", $cloud->{Withcss
}, "\n",
63 " Output: ", $cloud->{Output
}, "\n",
66 # Set Koha context if KohaConf is present
67 my $set_new_context = 0;
68 if ( $cloud->{KohaConf
} ) {
69 if ( -e
$cloud->{KohaConf
} ) {
70 my $context = C4
::Context
->new( $cloud->{KohaConf
} );
71 $context->set_context();
75 carp
"Koha conf file doesn't exist: ", $cloud->{KohaConf
}, " ; use KOHA_CONF\n";
79 my $index = new ZebraIndex
( $cloud->{ZebraIndex
} );
80 $index->scan( $cloud->{Count
} );
82 open my $fh, ">", $cloud->{Output
}
83 or croak
"Unable to create file ", $cloud->{Output
};
85 my $withcss = $cloud->{Withcss
} =~ /^yes/i;
86 print $fh $index->html_cloud( $cloud->{KohaIndex
}, $withcss );
88 $set_new_context && restore_context C4
::Context
;
103 $self->{ zebra_index
} = shift;
104 $self->{ top_terms
} = undef;
105 $self->{ levels_cloud
} = 24;
109 my $zbiblio = C4
::Context
->Zconn( "biblioserver" );
111 my $ss = $zbiblio->scan_pqf(
112 '@attr 1=' . $self->{ zebra_index
} . ' @attr 4=1 @attr 6=3 "a"'
115 croak
"Invalid Zebra index: ", $self->{ zebra_index
} if $@
;
123 # Scan zebra index and populate an array of top terms
126 # $max_terms Max number of top terms
129 # A 4-dimensionnal array in $self->{top_terms}
131 # [1] term number of occurrences
132 # [2] term proportional relative weight in terms set E[0-1]
133 # [3] term logarithmic relative weight E [0-levels_cloud]
135 # This array is sorted alphabetically by terms ([0])
136 # It can be easily sorted by occurrences:
137 # @t = sort { $a[1] <=> $a[1] } @{$self->{top_terms}};
141 my $index_name = $self->{ zebra_index
};
142 my $max_terms = shift;
144 my $MAX_OCCURENCE = 1000000000;
146 my $zbiblio = C4
::Context
->Zconn( "biblioserver" );
147 my $number_of_terms = 0;
148 my @terms; # 2 dimensions array
149 my $min_occurence_index = -1;
156 print "$from\n" if $verbose;
157 $from =~ s/\"/\\\"/g;
158 my $query = '@attr 1=' . $index_name . ' @attr 4=1 @attr 6=3 "'
160 $ss = $zbiblio->scan_pqf( $query );
166 $ss->option( rpnCharset
=> 'UTF-8' );
167 last if $ss->size() == 0;
170 for my $index ( 0..$ss->size()-1 ) {
171 ($term, $occ) = $ss->display_term($index);
172 #print "$term:$occ\n";
173 if ( $number_of_terms < $max_terms ) {
174 push( @terms, [ $term, $occ ] );
176 if ( $number_of_terms == $max_terms ) {
177 $min_occurence = $MAX_OCCURENCE;
178 for (0..$number_of_terms-1) {
179 my @term = @
{ $terms[$_] };
180 if ( $term[1] <= $min_occurence ) {
181 $min_occurence = $term[1];
182 $min_occurence_index = $_;
188 if ( $occ > $min_occurence) {
189 @
{ $terms[$min_occurence_index] }[0] = $term;
190 @
{ $terms[$min_occurence_index] }[1] = $occ;
191 $min_occurence = $MAX_OCCURENCE;
192 for (0..$max_terms-1) {
193 my @term = @
{ $terms[$_] };
194 if ( $term[1] <= $min_occurence ) {
195 $min_occurence = $term[1];
196 $min_occurence_index = $_;
205 # Sort array of array by terms weight
206 @terms = sort { @
{$a}[1] <=> @
{$b}[1] } @terms;
208 # A relatif weight to other set terms is added to each term
209 my $min = $terms[0][1];
210 my $log_min = log( $min );
211 my $max = $terms[$#terms][1];
212 my $log_max = log( $max );
213 my $delta = $max - $min;
214 $delta = 1 if $delta == 0; # Very unlikely
216 if ($log_max - $log_min == 0) {
217 $log_min = $log_min - $self->{levels_cloud
};
221 $factor = $self->{levels_cloud
} / ($log_max - $log_min);
224 foreach (0..$#terms) {
225 my $count = @
{ $terms[$_] }[1];
226 my $weight = ( $count - $min ) / $delta;
227 my $log_weight = int( (log($count) - $log_min) * $factor);
228 push( @
{ $terms[$_] }, $weight );
229 push( @
{ $terms[$_] }, $log_weight );
231 $self->{ top_terms
} = \
@terms;
233 # Sort array of array by terms alphabetical order
234 @terms = sort { @
{$a}[0] cmp @
{$b}[0] } @terms;
239 # Returns a HTML version of index top terms formatted
244 my $koha_index = shift;
246 my @terms = @
{ $self->{top_terms
} };
259 font-weight: lighter;
260 text-decoration: none;
262 span.tagcloud0 { font-size: 12px;}
263 span.tagcloud1 { font-size: 13px;}
264 span.tagcloud2 { font-size: 14px;}
265 span.tagcloud3 { font-size: 15px;}
266 span.tagcloud4 { font-size: 16px;}
267 span.tagcloud5 { font-size: 17px;}
268 span.tagcloud6 { font-size: 18px;}
269 span.tagcloud7 { font-size: 19px;}
270 span.tagcloud8 { font-size: 20px;}
271 span.tagcloud9 { font-size: 21px;}
272 span.tagcloud10 { font-size: 22px;}
273 span.tagcloud11 { font-size: 23px;}
274 span.tagcloud12 { font-size: 24px;}
275 span.tagcloud13 { font-size: 25px;}
276 span.tagcloud14 { font-size: 26px;}
277 span.tagcloud15 { font-size: 27px;}
278 span.tagcloud16 { font-size: 28px;}
279 span.tagcloud17 { font-size: 29px;}
280 span.tagcloud18 { font-size: 30px;}
281 span.tagcloud19 { font-size: 31px;}
282 span.tagcloud20 { font-size: 32px;}
283 span.tagcloud21 { font-size: 33px;}
284 span.tagcloud22 { font-size: 34px;}
285 span.tagcloud23 { font-size: 35px;}
286 span.tagcloud24 { font-size: 36px;}
288 <div class="subjectcloud">
292 my @term = @
{ $terms[$_] };
295 #print " 0=", $term[0]," - 1=", $term[1], " - 2=", $term[2], " - 3=", $term[3],"\n";
297 . '<span class="tagcloud'
300 . '<a href="/cgi-bin/koha/opac-search.pl?q='
315 cloud-kw.pl - Creates HTML keywords clouds from Koha Zebra Indexes
321 =item cloud-kw.pl [--verbose|--help] --conf=F<cloud.conf>
323 Creates multiple HTML files containing kewords cloud with top terms sorted
324 by their logarithmic weight.
325 F<cloud.conf> is a YAML configuration file driving cloud generation
334 =item B<--conf=configuration file>
336 Specify configuration file name
338 =item B<--verbose|-v>
340 Enable script verbose mode.
344 Print this help page.
350 Configuration file looks like that:
353 # Koha configuration file for a specific installation
354 # If not present, defaults to KOHA_CONF
355 KohaConf: /home/koha/mylibray/etc/koha-conf.xml
356 # Zebra index to scan
358 # Koha index used to link found kewords with an opac search URL
360 # Number of top keyword to use for the cloud
362 # Include CSS style directives with the cloud
363 # This could be used as a model and then CSS directives are
364 # put in the appropriate CSS file directly.
366 # HTML file where to output the cloud
367 Output: /home/koha/mylibrary/koharoot/koha-tmpl/cloud-author.html
369 KohaConf: /home/koha/yourlibray/etc/koha-conf.xml
374 Output: /home/koha/yourlibrary/koharoot/koha-tmpl/cloud-subject.html
378 Generated top terms have more informations than those outputted from
379 the time being. Some parameters could be easily added to improve
386 In order to output terms with the number of occurrences they
387 have been found in Koha Catalogue by Zebra.
391 Number of levels in the cloud. Now 24 levels are hardcoded.
395 Weighting method used to distribute terms in the cloud. We could have two
396 values: Logarithmic and Linear. Now it's Logarithmic by default.
400 Now terms are outputted in the lexical order. They could be sorted