4 # Copyright 2008 Tamil s.a.r.l.
6 # This software is placed under the gnu General Public License, v2
7 # (http://www.gnu.org/licenses/gpl.html)
24 'verbose' => \
$verbose,
30 pod2usage
( -verbose
=> 2 );
34 usage
() if $help || !$conf;
38 print "Reading configuration file: $conf\n" if $verbose;
40 @clouds = LoadFile
( $conf );
42 croak
"Unable to read configuration file: $conf\n" if $@
;
44 for my $cloud ( @clouds ) {
45 print "Create a cloud\n",
46 " Koha conf file: ", $cloud->{KohaConf
} ?
$cloud->{KohaConf
} : "default", "\n",
47 " Zebra Index: ", $cloud->{ZebraIndex
}, "\n",
48 " Koha Keyword: ", $cloud->{KohaIndex
}, "\n",
49 " Count: ", $cloud->{Count
}, "\n",
50 " Withcss: ", $cloud->{Withcss
}, "\n",
51 " Output: ", $cloud->{Output
}, "\n",
54 # Set Koha context if KohaConf is present
55 my $set_new_context = 0;
56 if ( $cloud->{KohaConf
} ) {
57 if ( -e
$cloud->{KohaConf
} ) {
58 my $context = C4
::Context
->new( $cloud->{KohaConf
} );
59 $context->set_context();
63 carp
"Koha conf file doesn't exist: ", $cloud->{KohaConf
}, " ; use KOHA_CONF\n";
67 my $index = new ZebraIndex
( $cloud->{ZebraIndex
} );
68 $index->scan( $cloud->{Count
} );
70 open my $fh, ">", $cloud->{Output
}
71 or croak
"Unable to create file ", $cloud->{Output
};
73 my $withcss = $cloud->{Withcss
} =~ /^yes/i;
74 print $fh $index->html_cloud( $cloud->{KohaIndex
}, $withcss );
76 $set_new_context && restore_context C4
::Context
;
91 $self->{ zebra_index
} = shift;
92 $self->{ top_terms
} = undef;
93 $self->{ levels_cloud
} = 24;
97 my $zbiblio = C4
::Context
->Zconn( "biblioserver" );
99 my $ss = $zbiblio->scan_pqf(
100 '@attr 1=' . $self->{ zebra_index
} . ' @attr 4=1 @attr 6=3 "a"'
103 croak
"Invalid Zebra index: ", $self->{ zebra_index
} if $@
;
111 # Scan zebra index and populate an array of top terms
114 # $max_terms Max number of top terms
117 # A 4-dimensionnal array in $self->{top_terms}
119 # [1] term number of occurences
120 # [2] term proportional relative weight in terms set E[0-1]
121 # [3] term logarithmic relative weight E [0-levels_cloud]
123 # This array is sorted alphabetically by terms ([0])
124 # It can be easily sorted by occurences:
125 # @t = sort { $a[1] <=> $a[1] } @{$self->{top_terms}};
129 my $index_name = $self->{ zebra_index
};
130 my $max_terms = shift;
132 my $MAX_OCCURENCE = 1000000000;
134 my $zbiblio = C4
::Context
->Zconn( "biblioserver" );
135 my $number_of_terms = 0;
136 my @terms; # 2 dimensions array
137 my $min_occurence_index = -1;
144 print "$from\n" if $verbose;
145 $from =~ s/\"/\\\"/g;
146 my $query = '@attr 1=' . $index_name . ' @attr 4=1 @attr 6=3 "'
148 $ss = $zbiblio->scan_pqf( $query );
154 $ss->option( rpnCharset
=> 'UTF-8' );
155 last if $ss->size() == 0;
158 for my $index ( 0..$ss->size()-1 ) {
159 ($term, $occ) = $ss->display_term($index);
160 #print "$term:$occ\n";
161 if ( $number_of_terms < $max_terms ) {
162 push( @terms, [ $term, $occ ] );
164 if ( $number_of_terms == $max_terms ) {
165 $min_occurence = $MAX_OCCURENCE;
166 for (0..$number_of_terms-1) {
167 my @term = @
{ $terms[$_] };
168 if ( $term[1] <= $min_occurence ) {
169 $min_occurence = $term[1];
170 $min_occurence_index = $_;
176 if ( $occ > $min_occurence) {
177 @
{ $terms[$min_occurence_index] }[0] = $term;
178 @
{ $terms[$min_occurence_index] }[1] = $occ;
179 $min_occurence = $MAX_OCCURENCE;
180 for (0..$max_terms-1) {
181 my @term = @
{ $terms[$_] };
182 if ( $term[1] <= $min_occurence ) {
183 $min_occurence = $term[1];
184 $min_occurence_index = $_;
193 # Sort array of array by terms weight
194 @terms = sort { @
{$a}[1] <=> @
{$b}[1] } @terms;
196 # A relatif weight to other set terms is added to each term
197 my $min = $terms[0][1];
198 my $log_min = log( $min );
199 my $max = $terms[$#terms][1];
200 my $log_max = log( $max );
201 my $delta = $max - $min;
202 $delta = 1 if $delta == 0; # Very unlikely
204 if ($log_max - $log_min == 0) {
205 $log_min = $log_min - $self->{levels_cloud
};
209 $factor = $self->{levels_cloud
} / ($log_max - $log_min);
212 foreach (0..$#terms) {
213 my $count = @
{ $terms[$_] }[1];
214 my $weight = ( $count - $min ) / $delta;
215 my $log_weight = int( (log($count) - $log_min) * $factor);
216 push( @
{ $terms[$_] }, $weight );
217 push( @
{ $terms[$_] }, $log_weight );
219 $self->{ top_terms
} = \
@terms;
221 # Sort array of array by terms alphabetical order
222 @terms = sort { @
{$a}[0] cmp @
{$b}[0] } @terms;
227 # Returns a HTML version of index top terms formated
232 my $koha_index = shift;
234 my @terms = @
{ $self->{top_terms
} };
247 font-weight: lighter;
248 text-decoration: none;
250 span.tagcloud0 { font-size: 12px;}
251 span.tagcloud1 { font-size: 13px;}
252 span.tagcloud2 { font-size: 14px;}
253 span.tagcloud3 { font-size: 15px;}
254 span.tagcloud4 { font-size: 16px;}
255 span.tagcloud5 { font-size: 17px;}
256 span.tagcloud6 { font-size: 18px;}
257 span.tagcloud7 { font-size: 19px;}
258 span.tagcloud8 { font-size: 20px;}
259 span.tagcloud9 { font-size: 21px;}
260 span.tagcloud10 { font-size: 22px;}
261 span.tagcloud11 { font-size: 23px;}
262 span.tagcloud12 { font-size: 24px;}
263 span.tagcloud13 { font-size: 25px;}
264 span.tagcloud14 { font-size: 26px;}
265 span.tagcloud15 { font-size: 27px;}
266 span.tagcloud16 { font-size: 28px;}
267 span.tagcloud17 { font-size: 29px;}
268 span.tagcloud18 { font-size: 30px;}
269 span.tagcloud19 { font-size: 31px;}
270 span.tagcloud20 { font-size: 32px;}
271 span.tagcloud21 { font-size: 33px;}
272 span.tagcloud22 { font-size: 34px;}
273 span.tagcloud23 { font-size: 35px;}
274 span.tagcloud24 { font-size: 36px;}
276 <div class="subjectcloud">
280 my @term = @
{ $terms[$_] };
283 #print " 0=", $term[0]," - 1=", $term[1], " - 2=", $term[2], " - 3=", $term[3],"\n";
285 . '<span class="tagcloud'
288 . '<a href="/cgi-bin/koha/opac-search.pl?q='
303 cloud-kw.pl - Creates HTML keywords clouds from Koha Zebra Indexes
309 =item cloud-kw.pl [--verbose|--help] --conf=F<cloud.conf>
311 Creates multiple HTML files containing kewords cloud with top terms sorted
312 by their logarithmic weight.
313 F<cloud.conf> is a YAML configuration file driving cloud generation
322 =item B<--conf=configuration file>
324 Specify configuration file name
326 =item B<--verbose|-v>
328 Enable script verbose mode.
332 Print this help page.
338 Configuration file looks like that:
341 # Koha configuration file for a specific installation
342 # If not present, defaults to KOHA_CONF
343 KohaConf: /home/koha/mylibray/etc/koha-conf.xml
344 # Zebra index to scan
346 # Koha index used to link found kewords with an opac search URL
348 # Number of top keyword to use for the cloud
350 # Include CSS style directives with the cloud
351 # This could be used as a model and then CSS directives are
352 # put in the appropriate CSS file directly.
354 # HTML file where to output the cloud
355 Output: /home/koha/mylibrary/koharoot/koha-tmpl/cloud-author.html
357 KohaConf: /home/koha/yourlibray/etc/koha-conf.xml
362 Output: /home/koha/yourlibrary/koharoot/koha-tmpl/cloud-subject.html
366 Generated top terms have more informations than those outputted from
367 the time beeing. Some parameters could be easily added to improve
374 In order to output terms with the number of occurences they
375 have been found in Koha Catalogue by Zebra.
379 Number of levels in the cloud. Now 24 levels are hardcoded.
383 Weighting method used to distribute terms in the cloud. We could have two
384 values: Logarithmic and Linear. Now it's Logarithmic by default.
388 Now terms are outputted in the lexical order. They could be sorted