2 # This file is part of Koha.
4 # Koha is free software; you can redistribute it and/or modify it under the
5 # terms of the GNU General Public License as published by the Free Software
6 # Foundation; either version 2 of the License, or (at your option) any later
9 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
10 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
11 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License along with
14 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
15 # Suite 330, Boston, MA 02111-1307 USA
25 use vars
qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
26 use vars qw($ext_dict $select_all @fields);
32 &get_tag &get_tags &get_tag_rows
36 &delete_tag_rows_by_ids
43 &get_count_by_tag_status
47 $ext_dict = C4
::Context
->preference('TagsExternalDictionary');
50 import Data
::Dumper
qw(:DEFAULT);
51 print STDERR __PACKAGE__
. " external dictionary = " . ($ext_dict||'none') . "\n";
54 require Lingua
::Ispell
;
55 import Lingua
::Ispell
qw(spellcheck add_word_lc save_dictionary);
60 $ext_dict and $Lingua::Ispell
::path
= $ext_dict;
61 $debug and print STDERR
"\$Lingua::Ispell::path = $Lingua::Ispell::path\n";
62 @fields = qw(tag_id borrowernumber biblionumber term language date_created);
63 $select_all = "SELECT " . join(',',@fields) . "\n FROM tags_all\n";
66 sub get_filters
(;$) {
67 my $query = "SELECT * FROM tags_filters ";
70 $sth = C4
::Context
->dbh->prepare($query . " WHERE filter_id = ? ");
73 $sth = C4
::Context
->dbh->prepare($query);
76 return $sth->fetchall_arrayref({});
79 # (SELECT count(*) FROM tags_all ) as tags_all,
80 # (SELECT count(*) FROM tags_index ) as tags_index,
82 sub approval_counts
() {
84 (SELECT count(*) FROM tags_approval WHERE approved= 1) as approved_count,
85 (SELECT count(*) FROM tags_approval WHERE approved=-1) as rejected_count,
86 (SELECT count(*) FROM tags_approval WHERE approved= 0) as unapproved_count
88 my $sth = C4
::Context
->dbh->prepare($query);
90 my $result = $sth->fetchrow_hashref();
91 $result->{approved_total
} = $result->{approved_count
} + $result->{rejected_count
} + $result->{unapproved_count
};
92 $debug and warn "counts returned: " . Dumper
$result;
96 =head2 get_count_by_tag_status
98 get_count_by_tag_status($status);
100 Takes a status and gets a count of tags with that status
104 sub get_count_by_tag_status
{
106 my $dbh = C4
::Context
->dbh;
108 "SELECT count(*) FROM tags_approval WHERE approved=?";
109 my $sth = $dbh->prepare($query);
110 $sth->execute( $status );
111 return $sth->fetchrow;
114 sub remove_tag
($;$) {
115 my $tag_id = shift or return undef;
116 my $user_id = (@_) ?
shift : undef;
117 my $rows = (defined $user_id) ?
118 get_tag_rows
({tag_id
=>$tag_id, borrowernumber
=>$user_id}) :
119 get_tag_rows
({tag_id
=>$tag_id}) ;
121 (scalar(@
$rows) == 1) or return undef; # should never happen (duplicate ids)
122 my $row = shift(@
$rows);
123 ($tag_id == $row->{tag_id
}) or return 0;
124 my $tags = get_tags
({term
=>$row->{term
}, biblionumber
=>$row->{biblionumber
}});
125 my $index = shift(@
$tags);
126 $debug and print STDERR
127 sprintf "remove_tag: tag_id=>%s, biblionumber=>%s, weight=>%s, weight_total=>%s\n",
128 $row->{tag_id
}, $row->{biblionumber
}, $index->{weight
}, $index->{weight_total
};
129 if ($index->{weight
} <= 1) {
130 delete_tag_index
($row->{term
},$row->{biblionumber
});
132 decrement_weight
($row->{term
},$row->{biblionumber
});
134 if ($index->{weight_total
} <= 1) {
135 delete_tag_approval
($row->{term
});
137 decrement_weight_total
($row->{term
});
139 delete_tag_row_by_id
($tag_id);
142 sub delete_tag_index
($$) {
143 (@_) or return undef;
144 my $sth = C4
::Context
->dbh->prepare("DELETE FROM tags_index WHERE term = ? AND biblionumber = ? LIMIT 1");
146 return $sth->rows || 0;
148 sub delete_tag_approval
($) {
149 (@_) or return undef;
150 my $sth = C4
::Context
->dbh->prepare("DELETE FROM tags_approval WHERE term = ? LIMIT 1");
151 $sth->execute(shift);
152 return $sth->rows || 0;
154 sub delete_tag_row_by_id
($) {
155 (@_) or return undef;
156 my $sth = C4
::Context
->dbh->prepare("DELETE FROM tags_all WHERE tag_id = ? LIMIT 1");
157 $sth->execute(shift);
158 return $sth->rows || 0;
160 sub delete_tag_rows_by_ids
(@
) {
161 (@_) or return undef;
164 $i += delete_tag_row_by_id
($_);
166 ($i == scalar(@_)) or
167 warn sprintf "delete_tag_rows_by_ids tried %s tag_ids, only succeeded on $i", scalar(@_);
171 sub get_tag_rows
($) {
172 my $hash = shift || {};
173 my @ok_fields = @fields;
174 push @ok_fields, 'limit'; # push the limit! :)
178 foreach my $key (keys %$hash) {
179 $debug and print STDERR
"get_tag_rows arg. '$key' = ", $hash->{$key}, "\n";
180 unless (length $key) {
181 carp
"Empty argument key to get_tag_rows: ignoring!";
184 unless (1 == scalar grep {/^ $key $/x} @ok_fields) {
185 carp
"get_tag_rows received unreconized argument key '$key'.";
188 if ($key eq 'limit') {
189 my $val = $hash->{$key};
190 unless ($val =~ /^(\d+,)?\d+$/) {
191 carp
"Non-nuerical limit value '$val' ignored!";
194 $limit = " LIMIT $val\n";
196 $wheres .= ($wheres) ?
" AND $key = ?\n" : " WHERE $key = ?\n";
197 push @exe_args, $hash->{$key};
200 my $query = $select_all . ($wheres||'') . $limit;
201 $debug and print STDERR
"get_tag_rows query:\n $query\n",
202 "get_tag_rows query args: ", join(',', @exe_args), "\n";
203 my $sth = C4
::Context
->dbh->prepare($query);
205 $sth->execute(@exe_args);
209 return $sth->fetchall_arrayref({});
212 sub get_tags
(;$) { # i.e., from tags_index
213 my $hash = shift || {};
214 my @ok_fields = qw(term biblionumber weight limit sort approved);
219 foreach my $key (keys %$hash) {
220 $debug and print STDERR
"get_tags arg. '$key' = ", $hash->{$key}, "\n";
221 unless (length $key) {
222 carp
"Empty argument key to get_tags: ignoring!";
225 unless (1 == scalar grep {/^ $key $/x} @ok_fields) {
226 carp
"get_tags received unreconized argument key '$key'.";
229 if ($key eq 'limit') {
230 my $val = $hash->{$key};
231 unless ($val =~ /^(\d+,)?\d+$/) {
232 carp
"Non-nuerical limit value '$val' ignored!";
235 $limit = " LIMIT $val\n";
236 } elsif ($key eq 'sort') {
237 foreach my $by (split /\,/, $hash->{$key}) {
239 $by =~ /^([-+])?(term)/ or
240 $by =~ /^([-+])?(biblionumber)/ or
241 $by =~ /^([-+])?(weight)/
243 carp
"get_tags received illegal sort order '$by'";
249 $order = " ORDER BY ";
251 $order .= $2 . " " . ((!$1) ?
'' : $1 eq '-' ?
'DESC' : $1 eq '+' ?
'ASC' : '') . "\n";
255 my $whereval = $hash->{$key};
256 my $longkey = ($key eq 'term' ) ?
'tags_index.term' :
257 ($key eq 'approved') ?
'tags_approval.approved' : $key;
258 my $op = ($whereval =~ s/^(>=|<=)// or
259 $whereval =~ s/^(>|=|<)// ) ?
$1 : '=';
260 $wheres .= ($wheres) ?
" AND $longkey $op ?\n" : " WHERE $longkey $op ?\n";
261 push @exe_args, $whereval;
265 SELECT tags_index.term as term,biblionumber,weight,weight_total
267 LEFT JOIN tags_approval
268 ON tags_index.term = tags_approval.term
269 " . ($wheres||'') . $order . $limit;
270 $debug and print STDERR
"get_tags query:\n $query\n",
271 "get_tags query args: ", join(',', @exe_args), "\n";
272 my $sth = C4
::Context
->dbh->prepare($query);
274 $sth->execute(@exe_args);
278 return $sth->fetchall_arrayref({});
281 sub get_approval_rows
(;$) { # i.e., from tags_approval
282 my $hash = shift || {};
283 my @ok_fields = qw(term approved date_approved approved_by weight_total limit sort borrowernumber);
288 foreach my $key (keys %$hash) {
289 $debug and print STDERR
"get_approval_rows arg. '$key' = ", $hash->{$key}, "\n";
290 unless (length $key) {
291 carp
"Empty argument key to get_approval_rows: ignoring!";
294 unless (1 == scalar grep {/^ $key $/x} @ok_fields) {
295 carp
"get_approval_rows received unreconized argument key '$key'.";
298 if ($key eq 'limit') {
299 my $val = $hash->{$key};
300 unless ($val =~ /^(\d+,)?\d+$/) {
301 carp
"Non-numerical limit value '$val' ignored!";
304 $limit = " LIMIT $val\n";
305 } elsif ($key eq 'sort') {
306 foreach my $by (split /\,/, $hash->{$key}) {
308 $by =~ /^([-+])?(term)/ or
309 $by =~ /^([-+])?(biblionumber)/ or
310 $by =~ /^([-+])?(borrowernumber)/ or
311 $by =~ /^([-+])?(weight_total)/ or
312 $by =~ /^([-+])?(approved(_by)?)/ or
313 $by =~ /^([-+])?(date_approved)/
315 carp
"get_approval_rows received illegal sort order '$by'";
321 $order = " ORDER BY " unless $order;
323 $order .= $2 . " " . ((!$1) ?
'' : $1 eq '-' ?
'DESC' : $1 eq '+' ?
'ASC' : '') . "\n";
327 my $whereval = $hash->{$key};
328 my $op = ($whereval =~ s/^(>=|<=)// or
329 $whereval =~ s/^(>|=|<)// ) ?
$1 : '=';
330 $wheres .= ($wheres) ?
" AND $key $op ?\n" : " WHERE $key $op ?\n";
331 push @exe_args, $whereval;
335 SELECT tags_approval.term AS term,
336 tags_approval.approved AS approved,
337 tags_approval.date_approved AS date_approved,
338 tags_approval.approved_by AS approved_by,
339 tags_approval.weight_total AS weight_total,
340 CONCAT(borrowers.surname, ', ', borrowers.firstname) AS approved_by_name
343 ON tags_approval.approved_by = borrowers.borrowernumber ";
344 $query .= ($wheres||'') . $order . $limit;
345 $debug and print STDERR
"get_approval_rows query:\n $query\n",
346 "get_approval_rows query args: ", join(',', @exe_args), "\n";
347 my $sth = C4
::Context
->dbh->prepare($query);
349 $sth->execute(@exe_args);
353 return $sth->fetchall_arrayref({});
356 sub is_approved
($) {
357 my $term = shift or return undef;
358 my $sth = C4
::Context
->dbh->prepare("SELECT approved FROM tags_approval WHERE term = ?");
359 $sth->execute($term);
360 unless ($sth->rows) {
361 $ext_dict and return (spellcheck
($term) ?
0 : 1); # spellcheck returns empty on OK word
364 return $sth->fetchrow;
367 sub get_tag_index
($;$) {
368 my $term = shift or return undef;
371 $sth = C4
::Context
->dbh->prepare("SELECT * FROM tags_index WHERE term = ? AND biblionumber = ?");
372 $sth->execute($term,shift);
374 $sth = C4
::Context
->dbh->prepare("SELECT * FROM tags_index WHERE term = ?");
375 $sth->execute($term);
377 return $sth->fetchrow_hashref;
381 my $operator = shift;
382 defined $operator or return undef; # have to test defined to allow =0 (kohaadmin)
385 spellcheck
($_) or next;
390 my $aref = get_approval_rows
({term
=>$_});
391 if ($aref and scalar @
$aref) {
392 mod_tag_approval
($operator,$_,1);
394 add_tag_approval
($_,$operator);
399 # note: there is no "unwhitelist" operation because there is no remove for Ispell.
400 # The blacklist regexps should operate "in front of" the whitelist, so if you approve
401 # a term mistakenly, you can still reverse it. But there is no going back to "neutral".
403 my $operator = shift;
404 defined $operator or return undef; # have to test defined to allow =0 (kohaadmin)
406 my $aref = get_approval_rows
({term
=>$_});
407 if ($aref and scalar @
$aref) {
408 mod_tag_approval
($operator,$_,-1);
410 add_tag_approval
($_,$operator,-1);
416 my $operator = shift;
417 defined $operator or return undef; # have to test defined to allow =0 (kohaadmin)
418 my $query = "INSERT INTO tags_blacklist (regexp,y,z) VALUES (?,?,?)";
419 # my $sth = C4::Context->dbh->prepare($query);
423 my $operator = shift;
424 defined $operator or return undef; # have to test defined to allow =0 (kohaadmin)
425 my $query = "REMOVE FROM tags_blacklist WHERE blacklist_id = ?";
426 # my $sth = C4::Context->dbh->prepare($query);
427 # $sth->execute($term);
431 sub add_tag_approval
($;$$) { # or disapproval
432 $debug and warn "add_tag_approval(" . join(", ",map {defined($_) ?
$_ : 'UNDEF'} @_) . ")";
433 my $term = shift or return undef;
434 my $query = "SELECT * FROM tags_approval WHERE term = ?";
435 my $sth = C4
::Context
->dbh->prepare($query);
436 $sth->execute($term);
437 ($sth->rows) and return increment_weight_total
($term);
438 my $operator = shift || 0;
439 my $approval = (@_ ?
shift : 0); # default is unapproved
440 my @exe_args = ($term); # all 3 queries will use this argument
442 $query = "INSERT INTO tags_approval (term,approved_by,approved,date_approved) VALUES (?,?,?,NOW())";
443 push @exe_args, $operator, $approval;
444 } elsif ($approval) {
445 $query = "INSERT INTO tags_approval (term,approved,date_approved) VALUES (?,?,NOW())";
446 push @exe_args, $approval;
448 $query = "INSERT INTO tags_approval (term,date_approved) VALUES (?,NOW())";
450 $debug and print STDERR
"add_tag_approval query: $query\nadd_tag_approval args: (" . join(", ", @exe_args) . ")\n";
451 $sth = C4
::Context
->dbh->prepare($query);
452 $sth->execute(@exe_args);
456 sub mod_tag_approval
($$$) {
457 my $operator = shift;
458 defined $operator or return undef; # have to test defined to allow =0 (kohaadmin)
459 my $term = shift or return undef;
460 my $approval = (scalar @_ ?
shift : 1); # default is to approve
461 my $query = "UPDATE tags_approval SET approved_by=?, approved=?, date_approved=NOW() WHERE term = ?";
462 $debug and print STDERR
"mod_tag_approval query: $query\nmod_tag_approval args: ($operator,$approval,$term)\n";
463 my $sth = C4
::Context
->dbh->prepare($query);
464 $sth->execute($operator,$approval,$term);
467 sub add_tag_index
($$;$) {
468 my $term = shift or return undef;
469 my $biblionumber = shift or return undef;
470 my $query = "SELECT * FROM tags_index WHERE term = ? AND biblionumber = ?";
471 my $sth = C4
::Context
->dbh->prepare($query);
472 $sth->execute($term,$biblionumber);
473 ($sth->rows) and return increment_weight
($term,$biblionumber);
474 $query = "INSERT INTO tags_index (term,biblionumber) VALUES (?,?)";
475 $debug and print STDERR
"add_tag_index query: $query\nadd_tag_index args: ($term,$biblionumber)\n";
476 $sth = C4
::Context
->dbh->prepare($query);
477 $sth->execute($term,$biblionumber);
481 sub get_tag
($) { # by tag_id
482 (@_) or return undef;
483 my $sth = C4
::Context
->dbh->prepare("$select_all WHERE tag_id = ?");
484 $sth->execute(shift);
485 return $sth->fetchrow_hashref;
488 sub rectify_weights
(;$) {
489 my $dbh = C4
::Context
->dbh;
492 SELECT term,biblionumber,count(*) as count
495 (@_) and $query .= " WHERE term =? ";
496 $query .= " GROUP BY term,biblionumber ";
497 $sth = $dbh->prepare($query);
499 $sth->execute(shift);
503 my $results = $sth->fetchall_arrayref({}) or return undef;
505 foreach (@
$results) {
506 _set_weight
($_->{count
},$_->{term
},$_->{biblionumber
});
507 $tally{$_->{term
}} += $_->{count
};
509 foreach (keys %tally) {
510 _set_weight_total
($tally{$_},$_);
512 return ($results,\
%tally);
515 sub increment_weights
($$) {
516 increment_weight
(@_);
517 increment_weight_total
(shift);
519 sub decrement_weights
($$) {
520 decrement_weight
(@_);
521 decrement_weight_total
(shift);
523 sub increment_weight_total
($) {
524 _set_weight_total
('weight_total+1',shift);
526 sub increment_weight
($$) {
527 _set_weight
('weight+1',shift,shift);
529 sub decrement_weight_total
($) {
530 _set_weight_total
('weight_total-1',shift);
532 sub decrement_weight
($$) {
533 _set_weight
('weight-1',shift,shift);
535 sub _set_weight_total
($$) {
536 my $sth = C4
::Context
->dbh->prepare("
538 SET weight_total=" . (shift) . "
540 "); # note: CANNOT use "?" for weight_total (see the args above).
541 $sth->execute(shift); # just the term
543 sub _set_weight
($$$) {
544 my $dbh = C4
::Context
->dbh;
545 my $sth = $dbh->prepare("
547 SET weight=" . (shift) . "
554 sub add_tag
($$;$$) { # biblionumber,term,[borrowernumber,approvernumber]
555 my $biblionumber = shift or return undef;
556 my $term = shift or return undef;
557 my $borrowernumber = (@_) ?
shift : 0; # the user, default to kohaadmin
560 ($term) or return undef; # must be more than whitespace
561 my $rows = get_tag_rows
({biblionumber
=>$biblionumber, borrowernumber
=>$borrowernumber, term
=>$term, limit
=>1});
562 my $query = "INSERT INTO tags_all
563 (borrowernumber,biblionumber,term,date_created)
564 VALUES (?,?,?,NOW())";
565 $debug and print STDERR
"add_tag query: $query\n",
566 "add_tag query args: ($borrowernumber,$biblionumber,$term)\n";
568 $debug and carp
"Duplicate tag detected. Tag not added.";
571 # add to tags_all regardless of approaval
572 my $sth = C4
::Context
->dbh->prepare($query);
573 $sth->execute($borrowernumber,$biblionumber,$term);
576 if (scalar @_) { # if arg remains, it is the borrowernumber of the approver: tag is pre-approved.
577 my $approver = shift;
578 $debug and print STDERR
"term '$term' pre-approved by borrower #$approver\n";
579 add_tag_approval
($term,$approver,1);
580 add_tag_index
($term,$biblionumber,$approver);
581 } elsif (is_approved
($term) >= 1) {
582 $debug and print STDERR
"term '$term' approved by whitelist\n";
583 add_tag_approval
($term,0,1);
584 add_tag_index
($term,$biblionumber,1);
586 $debug and print STDERR
"term '$term' NOT approved (yet)\n";
587 add_tag_approval
($term);
588 add_tag_index
($term,$biblionumber);
595 =head1 C4::Tags.pm - Support for user tagging of biblios.
597 More verose debugging messages are sent in the presence of non-zero $ENV{"DEBUG"}.
599 =head2 add_tag(biblionumber,term[,borrowernumber])
601 =head3 TO DO: Add real perldoc
605 =head2 External Dictionary (Ispell) [Recommended]
607 An external dictionary can be used as a means of "pre-populating" and tracking
608 allowed terms based on the widely available Ispell dictionary. This can be the system
609 dictionary or a personal version, but in order to support whitelisting, it must be
610 editable to the process running Koha.
612 To enable, enter the absolute path to the ispell dictionary in the system
613 preference "TagsExternalDictionary".
615 Using external Ispell is recommended for both ease of use and performance. Note that any
616 language version of Ispell can be installed. It is also possible to modify the dictionary
617 at the command line to affect the desired content.
619 WARNING: The default Ispell dictionary includes (properly spelled) obscenities! Users
620 should build their own wordlist and recompile Ispell based on it. See man ispell for
623 =head2 Table Structure
625 The tables used by tags are:
631 Your first thought may be that this looks a little complicated. It is, but only because
632 it has to be. I'll try to explain.
634 tags_all - This table would be all we really need if we didn't care about moderation or
635 performance or tags disappearing when borrowers are removed. Too bad, we do. Otherwise
636 though, it contains all the relevant info about a given tag:
637 tag_id - unique id number for it
638 borrowernumber - user that entered it
639 biblionumber - book record it is attached to
640 term - tag "term" itself
641 language - perhaps used later to influence weighting
642 date_created - date and time it was created
644 tags_approval - Since we need to provide moderation, this table is used to track it. If no
645 external dictionary is used, this table is the sole reference for approval and rejection.
646 With an external dictionary, it tracks pending terms and past whitelist/blacklist actions.
647 This could be called an "approved terms" table. See above regarding the External Dictionary.
648 term - tag "term" itself
649 approved - Negative, 0 or positive if tag is rejected, pending or approved.
650 date_approved - date of last action
651 approved_by - staffer performing the last action
652 weight_total - total occurance of term in any biblio by any users
654 tags_index - This table is for performance, because by far the most common operation will
655 be fetching tags for a list of search results. We will have a set of biblios, and we will
656 want ONLY their approved tags and overall weighting. While we could implement a query that
657 would traverse tags_all filtered against tags_approval, the performance implications of
658 trying to calculate that and the "weight" (number of times a tag appears) on the fly are drastic.
659 term - approved term as it appears in tags_approval
660 biblionumber - book record it is attached to
661 weight - number of times tag applied by any user
663 tags_blacklist - A set of regular expression filters. Unsurprisingly, these should be perl-
664 compatible (PCRE) for your version of perl. Since this is a blacklist, a term will be
665 blocked if it matches any of the given patterns. WARNING: do not add blacklist regexps
666 if you do not understand their operation and interaction. It is quite easy to define too
667 simple or too complex a regexp and effectively block all terms. The blacklist operation is
668 fairly resource intensive, since every line of tags_blacklist will need to be read and compared.
669 It is recommended that tags_blacklist be used minimally, and only by an administrator with an
670 understanding of regular expression syntax and performance.
672 So the best way to think about the different tables is that they are each tailored to a certain
673 use. Note that tags_approval and tags_index do not rely on the user's borrower mapping, so
674 the tag population can continue to grow even if a user (along with their corresponding
675 rows in tags_all) is removed.
679 If you want to auto-populate some tags for debugging, do something like this:
681 mysql> select biblionumber from biblio where title LIKE "%Health%";
712 26 rows in set (0.00 sec)
714 Then, take those numbers and type/pipe them into this perl command line:
715 perl -ne 'use C4::Tags qw(get_tags add_tag); use Data::Dumper;chomp; add_tag($_,"health",51,1); print Dumper get_tags({limit=>5,term=>"health",});'
717 Note, the borrowernumber in this example is 51. Use your own or any arbitrary valid borrowernumber.