4 # Copyright 2008 Tamil s.a.r.l.
6 # This file is part of Koha.
8 # Koha is free software; you can redistribute it and/or modify it under the
9 # terms of the GNU General Public License as published by the Free Software
10 # Foundation; either version 2 of the License, or (at your option) any later
13 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
14 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
15 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License along
18 # with Koha; if not, write to the Free Software Foundation, Inc.,
19 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
35 'verbose' => \
$verbose,
41 pod2usage
( -verbose
=> 2 );
45 usage
() if $help || !$conf;
49 print "Reading configuration file: $conf\n" if $verbose;
51 @clouds = LoadFile
( $conf );
53 croak
"Unable to read configuration file: $conf\n" if $@
;
55 for my $cloud ( @clouds ) {
56 print "Create a cloud\n",
57 " Koha conf file: ", $cloud->{KohaConf
} ?
$cloud->{KohaConf
} : "default", "\n",
58 " Zebra Index: ", $cloud->{ZebraIndex
}, "\n",
59 " Koha Keyword: ", $cloud->{KohaIndex
}, "\n",
60 " Count: ", $cloud->{Count
}, "\n",
61 " Withcss: ", $cloud->{Withcss
}, "\n",
62 " Output: ", $cloud->{Output
}, "\n",
65 # Set Koha context if KohaConf is present
66 my $set_new_context = 0;
67 if ( $cloud->{KohaConf
} ) {
68 if ( -e
$cloud->{KohaConf
} ) {
69 my $context = C4
::Context
->new( $cloud->{KohaConf
} );
70 $context->set_context();
74 carp
"Koha conf file doesn't exist: ", $cloud->{KohaConf
}, " ; use KOHA_CONF\n";
78 my $index = new ZebraIndex
( $cloud->{ZebraIndex
} );
79 $index->scan( $cloud->{Count
} );
81 open my $fh, ">", $cloud->{Output
}
82 or croak
"Unable to create file ", $cloud->{Output
};
84 my $withcss = $cloud->{Withcss
} =~ /^yes/i;
85 print $fh $index->html_cloud( $cloud->{KohaIndex
}, $withcss );
87 $set_new_context && restore_context C4
::Context
;
102 $self->{ zebra_index
} = shift;
103 $self->{ top_terms
} = undef;
104 $self->{ levels_cloud
} = 24;
108 my $zbiblio = C4
::Context
->Zconn( "biblioserver" );
110 my $ss = $zbiblio->scan_pqf(
111 '@attr 1=' . $self->{ zebra_index
} . ' @attr 4=1 @attr 6=3 "a"'
114 croak
"Invalid Zebra index: ", $self->{ zebra_index
} if $@
;
122 # Scan zebra index and populate an array of top terms
125 # $max_terms Max number of top terms
128 # A 4-dimensionnal array in $self->{top_terms}
130 # [1] term number of occurences
131 # [2] term proportional relative weight in terms set E[0-1]
132 # [3] term logarithmic relative weight E [0-levels_cloud]
134 # This array is sorted alphabetically by terms ([0])
135 # It can be easily sorted by occurences:
136 # @t = sort { $a[1] <=> $a[1] } @{$self->{top_terms}};
140 my $index_name = $self->{ zebra_index
};
141 my $max_terms = shift;
143 my $MAX_OCCURENCE = 1000000000;
145 my $zbiblio = C4
::Context
->Zconn( "biblioserver" );
146 my $number_of_terms = 0;
147 my @terms; # 2 dimensions array
148 my $min_occurence_index = -1;
155 print "$from\n" if $verbose;
156 $from =~ s/\"/\\\"/g;
157 my $query = '@attr 1=' . $index_name . ' @attr 4=1 @attr 6=3 "'
159 $ss = $zbiblio->scan_pqf( $query );
165 $ss->option( rpnCharset
=> 'UTF-8' );
166 last if $ss->size() == 0;
169 for my $index ( 0..$ss->size()-1 ) {
170 ($term, $occ) = $ss->display_term($index);
171 #print "$term:$occ\n";
172 if ( $number_of_terms < $max_terms ) {
173 push( @terms, [ $term, $occ ] );
175 if ( $number_of_terms == $max_terms ) {
176 $min_occurence = $MAX_OCCURENCE;
177 for (0..$number_of_terms-1) {
178 my @term = @
{ $terms[$_] };
179 if ( $term[1] <= $min_occurence ) {
180 $min_occurence = $term[1];
181 $min_occurence_index = $_;
187 if ( $occ > $min_occurence) {
188 @
{ $terms[$min_occurence_index] }[0] = $term;
189 @
{ $terms[$min_occurence_index] }[1] = $occ;
190 $min_occurence = $MAX_OCCURENCE;
191 for (0..$max_terms-1) {
192 my @term = @
{ $terms[$_] };
193 if ( $term[1] <= $min_occurence ) {
194 $min_occurence = $term[1];
195 $min_occurence_index = $_;
204 # Sort array of array by terms weight
205 @terms = sort { @
{$a}[1] <=> @
{$b}[1] } @terms;
207 # A relatif weight to other set terms is added to each term
208 my $min = $terms[0][1];
209 my $log_min = log( $min );
210 my $max = $terms[$#terms][1];
211 my $log_max = log( $max );
212 my $delta = $max - $min;
213 $delta = 1 if $delta == 0; # Very unlikely
215 if ($log_max - $log_min == 0) {
216 $log_min = $log_min - $self->{levels_cloud
};
220 $factor = $self->{levels_cloud
} / ($log_max - $log_min);
223 foreach (0..$#terms) {
224 my $count = @
{ $terms[$_] }[1];
225 my $weight = ( $count - $min ) / $delta;
226 my $log_weight = int( (log($count) - $log_min) * $factor);
227 push( @
{ $terms[$_] }, $weight );
228 push( @
{ $terms[$_] }, $log_weight );
230 $self->{ top_terms
} = \
@terms;
232 # Sort array of array by terms alphabetical order
233 @terms = sort { @
{$a}[0] cmp @
{$b}[0] } @terms;
238 # Returns a HTML version of index top terms formated
243 my $koha_index = shift;
245 my @terms = @
{ $self->{top_terms
} };
258 font-weight: lighter;
259 text-decoration: none;
261 span.tagcloud0 { font-size: 12px;}
262 span.tagcloud1 { font-size: 13px;}
263 span.tagcloud2 { font-size: 14px;}
264 span.tagcloud3 { font-size: 15px;}
265 span.tagcloud4 { font-size: 16px;}
266 span.tagcloud5 { font-size: 17px;}
267 span.tagcloud6 { font-size: 18px;}
268 span.tagcloud7 { font-size: 19px;}
269 span.tagcloud8 { font-size: 20px;}
270 span.tagcloud9 { font-size: 21px;}
271 span.tagcloud10 { font-size: 22px;}
272 span.tagcloud11 { font-size: 23px;}
273 span.tagcloud12 { font-size: 24px;}
274 span.tagcloud13 { font-size: 25px;}
275 span.tagcloud14 { font-size: 26px;}
276 span.tagcloud15 { font-size: 27px;}
277 span.tagcloud16 { font-size: 28px;}
278 span.tagcloud17 { font-size: 29px;}
279 span.tagcloud18 { font-size: 30px;}
280 span.tagcloud19 { font-size: 31px;}
281 span.tagcloud20 { font-size: 32px;}
282 span.tagcloud21 { font-size: 33px;}
283 span.tagcloud22 { font-size: 34px;}
284 span.tagcloud23 { font-size: 35px;}
285 span.tagcloud24 { font-size: 36px;}
287 <div class="subjectcloud">
291 my @term = @
{ $terms[$_] };
294 #print " 0=", $term[0]," - 1=", $term[1], " - 2=", $term[2], " - 3=", $term[3],"\n";
296 . '<span class="tagcloud'
299 . '<a href="/cgi-bin/koha/opac-search.pl?q='
314 cloud-kw.pl - Creates HTML keywords clouds from Koha Zebra Indexes
320 =item cloud-kw.pl [--verbose|--help] --conf=F<cloud.conf>
322 Creates multiple HTML files containing kewords cloud with top terms sorted
323 by their logarithmic weight.
324 F<cloud.conf> is a YAML configuration file driving cloud generation
333 =item B<--conf=configuration file>
335 Specify configuration file name
337 =item B<--verbose|-v>
339 Enable script verbose mode.
343 Print this help page.
349 Configuration file looks like that:
352 # Koha configuration file for a specific installation
353 # If not present, defaults to KOHA_CONF
354 KohaConf: /home/koha/mylibray/etc/koha-conf.xml
355 # Zebra index to scan
357 # Koha index used to link found kewords with an opac search URL
359 # Number of top keyword to use for the cloud
361 # Include CSS style directives with the cloud
362 # This could be used as a model and then CSS directives are
363 # put in the appropriate CSS file directly.
365 # HTML file where to output the cloud
366 Output: /home/koha/mylibrary/koharoot/koha-tmpl/cloud-author.html
368 KohaConf: /home/koha/yourlibray/etc/koha-conf.xml
373 Output: /home/koha/yourlibrary/koharoot/koha-tmpl/cloud-subject.html
377 Generated top terms have more informations than those outputted from
378 the time beeing. Some parameters could be easily added to improve
385 In order to output terms with the number of occurences they
386 have been found in Koha Catalogue by Zebra.
390 Number of levels in the cloud. Now 24 levels are hardcoded.
394 Weighting method used to distribute terms in the cloud. We could have two
395 values: Logarithmic and Linear. Now it's Logarithmic by default.
399 Now terms are outputted in the lexical order. They could be sorted