2 # This file is part of Koha.
4 # Koha is free software; you can redistribute it and/or modify it under the
5 # terms of the GNU General Public License as published by the Free Software
6 # Foundation; either version 2 of the License, or (at your option) any later
9 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
10 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
11 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License along with
14 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
15 # Suite 330, Boston, MA 02111-1307 USA
25 use vars
qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
26 use vars qw($ext_dict $select_all @fields);
32 &get_tag &get_tags &get_tag_rows
36 &delete_tag_rows_by_ids
46 $ext_dict = C4
::Context
->preference('TagsExternalDictionary');
49 import Data
::Dumper
qw(:DEFAULT);
50 print STDERR __PACKAGE__
. " external dictionary = " . ($ext_dict||'none') . "\n";
53 require Lingua
::Ispell
;
54 import Lingua
::Ispell
qw(spellcheck add_word_lc save_dictionary);
59 $ext_dict and $Lingua::Ispell
::path
= $ext_dict;
60 $debug and print STDERR
"\$Lingua::Ispell::path = $Lingua::Ispell::path\n";
61 @fields = qw(tag_id borrowernumber biblionumber term language date_created);
62 $select_all = "SELECT " . join(',',@fields) . "\n FROM tags_all\n";
65 sub get_filters
(;$) {
66 my $query = "SELECT * FROM tags_filters ";
69 $sth = C4
::Context
->dbh->prepare($query . " WHERE filter_id = ? ");
72 $sth = C4
::Context
->dbh->prepare($query);
75 return $sth->fetchall_arrayref({});
78 # (SELECT count(*) FROM tags_all ) as tags_all,
79 # (SELECT count(*) FROM tags_index ) as tags_index,
81 sub approval_counts
() {
83 (SELECT count(*) FROM tags_approval WHERE approved= 1) as approved_count,
84 (SELECT count(*) FROM tags_approval WHERE approved=-1) as rejected_count,
85 (SELECT count(*) FROM tags_approval WHERE approved= 0) as unapproved_count
87 my $sth = C4
::Context
->dbh->prepare($query);
89 my $result = $sth->fetchrow_hashref();
90 $result->{approved_total
} = $result->{approved_count
} + $result->{rejected_count
} + $result->{unapproved_count
};
91 $debug and warn "counts returned: " . Dumper
$result;
95 sub remove_tag
($;$) {
96 my $tag_id = shift or return undef;
97 my $user_id = (@_) ?
shift : undef;
98 my $rows = (defined $user_id) ?
99 get_tag_rows
({tag_id
=>$tag_id, borrowernumber
=>$user_id}) :
100 get_tag_rows
({tag_id
=>$tag_id}) ;
102 (scalar(@
$rows) == 1) or return undef; # should never happen (duplicate ids)
103 my $row = shift(@
$rows);
104 ($tag_id == $row->{tag_id
}) or return 0;
105 my $tags = get_tags
({term
=>$row->{term
}, biblionumber
=>$row->{biblionumber
}});
106 my $index = shift(@
$tags);
107 $debug and print STDERR
108 sprintf "remove_tag: tag_id=>%s, biblionumber=>%s, weight=>%s, weight_total=>%s\n",
109 $row->{tag_id
}, $row->{biblionumber
}, $index->{weight
}, $index->{weight_total
};
110 if ($index->{weight
} <= 1) {
111 delete_tag_index
($row->{term
},$row->{biblionumber
});
113 decrement_weight
($row->{term
},$row->{biblionumber
});
115 if ($index->{weight_total
} <= 1) {
116 delete_tag_approval
($row->{term
});
118 decrement_weight_total
($row->{term
});
120 delete_tag_row_by_id
($tag_id);
123 sub delete_tag_index
($$) {
124 (@_) or return undef;
125 my $sth = C4
::Context
->dbh->prepare("DELETE FROM tags_index WHERE term = ? AND biblionumber = ? LIMIT 1");
127 return $sth->rows || 0;
129 sub delete_tag_approval
($) {
130 (@_) or return undef;
131 my $sth = C4
::Context
->dbh->prepare("DELETE FROM tags_approval WHERE term = ? LIMIT 1");
132 $sth->execute(shift);
133 return $sth->rows || 0;
135 sub delete_tag_row_by_id
($) {
136 (@_) or return undef;
137 my $sth = C4
::Context
->dbh->prepare("DELETE FROM tags_all WHERE tag_id = ? LIMIT 1");
138 $sth->execute(shift);
139 return $sth->rows || 0;
141 sub delete_tag_rows_by_ids
(@
) {
142 (@_) or return undef;
145 $i += delete_tag_row_by_id
($_);
147 ($i == scalar(@_)) or
148 warn sprintf "delete_tag_rows_by_ids tried %s tag_ids, only succeeded on $i", scalar(@_);
152 sub get_tag_rows
($) {
153 my $hash = shift || {};
154 my @ok_fields = @fields;
155 push @ok_fields, 'limit'; # push the limit! :)
159 foreach my $key (keys %$hash) {
160 $debug and print STDERR
"get_tag_rows arg. '$key' = ", $hash->{$key}, "\n";
161 unless (length $key) {
162 carp
"Empty argument key to get_tag_rows: ignoring!";
165 unless (1 == scalar grep {/^ $key $/x} @ok_fields) {
166 carp
"get_tag_rows received unreconized argument key '$key'.";
169 if ($key eq 'limit') {
170 my $val = $hash->{$key};
171 unless ($val =~ /^(\d+,)?\d+$/) {
172 carp
"Non-nuerical limit value '$val' ignored!";
175 $limit = " LIMIT $val\n";
177 $wheres .= ($wheres) ?
" AND $key = ?\n" : " WHERE $key = ?\n";
178 push @exe_args, $hash->{$key};
181 my $query = $select_all . ($wheres||'') . $limit;
182 $debug and print STDERR
"get_tag_rows query:\n $query\n",
183 "get_tag_rows query args: ", join(',', @exe_args), "\n";
184 my $sth = C4
::Context
->dbh->prepare($query);
186 $sth->execute(@exe_args);
190 return $sth->fetchall_arrayref({});
193 sub get_tags
(;$) { # i.e., from tags_index
194 my $hash = shift || {};
195 my @ok_fields = qw(term biblionumber weight limit sort approved);
200 foreach my $key (keys %$hash) {
201 $debug and print STDERR
"get_tags arg. '$key' = ", $hash->{$key}, "\n";
202 unless (length $key) {
203 carp
"Empty argument key to get_tags: ignoring!";
206 unless (1 == scalar grep {/^ $key $/x} @ok_fields) {
207 carp
"get_tags received unreconized argument key '$key'.";
210 if ($key eq 'limit') {
211 my $val = $hash->{$key};
212 unless ($val =~ /^(\d+,)?\d+$/) {
213 carp
"Non-nuerical limit value '$val' ignored!";
216 $limit = " LIMIT $val\n";
217 } elsif ($key eq 'sort') {
218 foreach my $by (split /\,/, $hash->{$key}) {
220 $by =~ /^([-+])?(term)/ or
221 $by =~ /^([-+])?(biblionumber)/ or
222 $by =~ /^([-+])?(weight)/
224 carp
"get_tags received illegal sort order '$by'";
230 $order = " ORDER BY ";
232 $order .= $2 . " " . ((!$1) ?
'' : $1 eq '-' ?
'DESC' : $1 eq '+' ?
'ASC' : '') . "\n";
236 my $whereval = $hash->{$key};
237 my $longkey = ($key eq 'term' ) ?
'tags_index.term' :
238 ($key eq 'approved') ?
'tags_approval.approved' : $key;
239 my $op = ($whereval =~ s/^(>=|<=)// or
240 $whereval =~ s/^(>|=|<)// ) ?
$1 : '=';
241 $wheres .= ($wheres) ?
" AND $longkey $op ?\n" : " WHERE $longkey $op ?\n";
242 push @exe_args, $whereval;
246 SELECT tags_index.term as term,biblionumber,weight,weight_total
248 LEFT JOIN tags_approval
249 ON tags_index.term = tags_approval.term
250 " . ($wheres||'') . $order . $limit;
251 $debug and print STDERR
"get_tags query:\n $query\n",
252 "get_tags query args: ", join(',', @exe_args), "\n";
253 my $sth = C4
::Context
->dbh->prepare($query);
255 $sth->execute(@exe_args);
259 return $sth->fetchall_arrayref({});
262 sub get_approval_rows
(;$) { # i.e., from tags_approval
263 my $hash = shift || {};
264 my @ok_fields = qw(term approved date_approved approved_by weight_total limit sort borrowernumber);
269 foreach my $key (keys %$hash) {
270 $debug and print STDERR
"get_approval_rows arg. '$key' = ", $hash->{$key}, "\n";
271 unless (length $key) {
272 carp
"Empty argument key to get_approval_rows: ignoring!";
275 unless (1 == scalar grep {/^ $key $/x} @ok_fields) {
276 carp
"get_approval_rows received unreconized argument key '$key'.";
279 if ($key eq 'limit') {
280 my $val = $hash->{$key};
281 unless ($val =~ /^(\d+,)?\d+$/) {
282 carp
"Non-numerical limit value '$val' ignored!";
285 $limit = " LIMIT $val\n";
286 } elsif ($key eq 'sort') {
287 foreach my $by (split /\,/, $hash->{$key}) {
289 $by =~ /^([-+])?(term)/ or
290 $by =~ /^([-+])?(biblionumber)/ or
291 $by =~ /^([-+])?(borrowernumber)/ or
292 $by =~ /^([-+])?(weight_total)/ or
293 $by =~ /^([-+])?(approved(_by)?)/ or
294 $by =~ /^([-+])?(date_approved)/
296 carp
"get_approval_rows received illegal sort order '$by'";
302 $order = " ORDER BY " unless $order;
304 $order .= $2 . " " . ((!$1) ?
'' : $1 eq '-' ?
'DESC' : $1 eq '+' ?
'ASC' : '') . "\n";
308 my $whereval = $hash->{$key};
309 my $op = ($whereval =~ s/^(>=|<=)// or
310 $whereval =~ s/^(>|=|<)// ) ?
$1 : '=';
311 $wheres .= ($wheres) ?
" AND $key $op ?\n" : " WHERE $key $op ?\n";
312 push @exe_args, $whereval;
316 SELECT tags_approval.term AS term,
317 tags_approval.approved AS approved,
318 tags_approval.date_approved AS date_approved,
319 tags_approval.approved_by AS approved_by,
320 tags_approval.weight_total AS weight_total,
321 CONCAT(borrowers.surname, ', ', borrowers.firstname) AS approved_by_name
324 ON tags_approval.approved_by = borrowers.borrowernumber ";
325 $query .= ($wheres||'') . $order . $limit;
326 $debug and print STDERR
"get_approval_rows query:\n $query\n",
327 "get_approval_rows query args: ", join(',', @exe_args), "\n";
328 my $sth = C4
::Context
->dbh->prepare($query);
330 $sth->execute(@exe_args);
334 return $sth->fetchall_arrayref({});
337 sub is_approved
($) {
338 my $term = shift or return undef;
339 my $sth = C4
::Context
->dbh->prepare("SELECT approved FROM tags_approval WHERE term = ?");
340 $sth->execute($term);
341 unless ($sth->rows) {
342 $ext_dict and return (spellcheck
($term) ?
0 : 1); # spellcheck returns empty on OK word
345 return $sth->fetchrow;
348 sub get_tag_index
($;$) {
349 my $term = shift or return undef;
352 $sth = C4
::Context
->dbh->prepare("SELECT * FROM tags_index WHERE term = ? AND biblionumber = ?");
353 $sth->execute($term,shift);
355 $sth = C4
::Context
->dbh->prepare("SELECT * FROM tags_index WHERE term = ?");
356 $sth->execute($term);
358 return $sth->fetchrow_hashref;
362 my $operator = shift;
363 defined $operator or return undef; # have to test defined to allow =0 (kohaadmin)
366 spellcheck
($_) or next;
371 my $aref = get_approval_rows
({term
=>$_});
372 if ($aref and scalar @
$aref) {
373 mod_tag_approval
($operator,$_,1);
375 add_tag_approval
($_,$operator);
380 # note: there is no "unwhitelist" operation because there is no remove for Ispell.
381 # The blacklist regexps should operate "in front of" the whitelist, so if you approve
382 # a term mistakenly, you can still reverse it. But there is no going back to "neutral".
384 my $operator = shift;
385 defined $operator or return undef; # have to test defined to allow =0 (kohaadmin)
387 my $aref = get_approval_rows
({term
=>$_});
388 if ($aref and scalar @
$aref) {
389 mod_tag_approval
($operator,$_,-1);
391 add_tag_approval
($_,$operator,-1);
397 my $operator = shift;
398 defined $operator or return undef; # have to test defined to allow =0 (kohaadmin)
399 my $query = "INSERT INTO tags_blacklist (regexp,y,z) VALUES (?,?,?)";
400 # my $sth = C4::Context->dbh->prepare($query);
404 my $operator = shift;
405 defined $operator or return undef; # have to test defined to allow =0 (kohaadmin)
406 my $query = "REMOVE FROM tags_blacklist WHERE blacklist_id = ?";
407 # my $sth = C4::Context->dbh->prepare($query);
408 # $sth->execute($term);
412 sub add_tag_approval
($;$$) { # or disapproval
413 $debug and warn "add_tag_approval(" . join(", ",map {defined($_) ?
$_ : 'UNDEF'} @_) . ")";
414 my $term = shift or return undef;
415 my $query = "SELECT * FROM tags_approval WHERE term = ?";
416 my $sth = C4
::Context
->dbh->prepare($query);
417 $sth->execute($term);
418 ($sth->rows) and return increment_weight_total
($term);
419 my $operator = shift || 0;
420 my $approval = (@_ ?
shift : 0); # default is unapproved
421 my @exe_args = ($term); # all 3 queries will use this argument
423 $query = "INSERT INTO tags_approval (term,approved_by,approved,date_approved) VALUES (?,?,?,NOW())";
424 push @exe_args, $operator, $approval;
425 } elsif ($approval) {
426 $query = "INSERT INTO tags_approval (term,approved,date_approved) VALUES (?,?,NOW())";
427 push @exe_args, $approval;
429 $query = "INSERT INTO tags_approval (term,date_approved) VALUES (?,NOW())";
431 $debug and print STDERR
"add_tag_approval query: $query\nadd_tag_approval args: (" . join(", ", @exe_args) . ")\n";
432 $sth = C4
::Context
->dbh->prepare($query);
433 $sth->execute(@exe_args);
437 sub mod_tag_approval
($$$) {
438 my $operator = shift;
439 defined $operator or return undef; # have to test defined to allow =0 (kohaadmin)
440 my $term = shift or return undef;
441 my $approval = (scalar @_ ?
shift : 1); # default is to approve
442 my $query = "UPDATE tags_approval SET approved_by=?, approved=?, date_approved=NOW() WHERE term = ?";
443 $debug and print STDERR
"mod_tag_approval query: $query\nmod_tag_approval args: ($operator,$approval,$term)\n";
444 my $sth = C4
::Context
->dbh->prepare($query);
445 $sth->execute($operator,$approval,$term);
448 sub add_tag_index
($$;$) {
449 my $term = shift or return undef;
450 my $biblionumber = shift or return undef;
451 my $query = "SELECT * FROM tags_index WHERE term = ? AND biblionumber = ?";
452 my $sth = C4
::Context
->dbh->prepare($query);
453 $sth->execute($term,$biblionumber);
454 ($sth->rows) and return increment_weight
($term,$biblionumber);
455 $query = "INSERT INTO tags_index (term,biblionumber) VALUES (?,?)";
456 $debug and print STDERR
"add_tag_index query: $query\nadd_tag_index args: ($term,$biblionumber)\n";
457 $sth = C4
::Context
->dbh->prepare($query);
458 $sth->execute($term,$biblionumber);
462 sub get_tag
($) { # by tag_id
463 (@_) or return undef;
464 my $sth = C4
::Context
->dbh->prepare("$select_all WHERE tag_id = ?");
465 $sth->execute(shift);
466 return $sth->fetchrow_hashref;
469 sub rectify_weights
(;$) {
470 my $dbh = C4
::Context
->dbh;
473 SELECT term,biblionumber,count(*) as count
476 (@_) and $query .= " WHERE term =? ";
477 $query .= " GROUP BY term,biblionumber ";
478 $sth = $dbh->prepare($query);
480 $sth->execute(shift);
484 my $results = $sth->fetchall_arrayref({}) or return undef;
486 foreach (@
$results) {
487 _set_weight
($_->{count
},$_->{term
},$_->{biblionumber
});
488 $tally{$_->{term
}} += $_->{count
};
490 foreach (keys %tally) {
491 _set_weight_total
($tally{$_},$_);
493 return ($results,\
%tally);
496 sub increment_weights
($$) {
497 increment_weight
(@_);
498 increment_weight_total
(shift);
500 sub decrement_weights
($$) {
501 decrement_weight
(@_);
502 decrement_weight_total
(shift);
504 sub increment_weight_total
($) {
505 _set_weight_total
('weight_total+1',shift);
507 sub increment_weight
($$) {
508 _set_weight
('weight+1',shift,shift);
510 sub decrement_weight_total
($) {
511 _set_weight_total
('weight_total-1',shift);
513 sub decrement_weight
($$) {
514 _set_weight
('weight-1',shift,shift);
516 sub _set_weight_total
($$) {
517 my $sth = C4
::Context
->dbh->prepare("
519 SET weight_total=" . (shift) . "
521 "); # note: CANNOT use "?" for weight_total (see the args above).
522 $sth->execute(shift); # just the term
524 sub _set_weight
($$$) {
525 my $dbh = C4
::Context
->dbh;
526 my $sth = $dbh->prepare("
528 SET weight=" . (shift) . "
535 sub add_tag
($$;$$) { # biblionumber,term,[borrowernumber,approvernumber]
536 my $biblionumber = shift or return undef;
537 my $term = shift or return undef;
538 my $borrowernumber = (@_) ?
shift : 0; # the user, default to kohaadmin
541 ($term) or return undef; # must be more than whitespace
542 my $rows = get_tag_rows
({biblionumber
=>$biblionumber, borrowernumber
=>$borrowernumber, term
=>$term, limit
=>1});
543 my $query = "INSERT INTO tags_all
544 (borrowernumber,biblionumber,term,date_created)
545 VALUES (?,?,?,NOW())";
546 $debug and print STDERR
"add_tag query: $query\n",
547 "add_tag query args: ($borrowernumber,$biblionumber,$term)\n";
549 $debug and carp
"Duplicate tag detected. Tag not added.";
552 # add to tags_all regardless of approaval
553 my $sth = C4
::Context
->dbh->prepare($query);
554 $sth->execute($borrowernumber,$biblionumber,$term);
557 if (scalar @_) { # if arg remains, it is the borrowernumber of the approver: tag is pre-approved.
558 my $approver = shift;
559 $debug and print STDERR
"term '$term' pre-approved by borrower #$approver\n";
560 add_tag_approval
($term,$approver,1);
561 add_tag_index
($term,$biblionumber,$approver);
562 } elsif (is_approved
($term) >= 1) {
563 $debug and print STDERR
"term '$term' approved by whitelist\n";
564 add_tag_approval
($term,0,1);
565 add_tag_index
($term,$biblionumber,1);
567 $debug and print STDERR
"term '$term' NOT approved (yet)\n";
568 add_tag_approval
($term);
569 add_tag_index
($term,$biblionumber);
576 =head1 C4::Tags.pm - Support for user tagging of biblios.
578 More verose debugging messages are sent in the presence of non-zero $ENV{"DEBUG"}.
580 =head2 add_tag(biblionumber,term[,borrowernumber])
582 =head3 TO DO: Add real perldoc
586 =head2 External Dictionary (Ispell) [Recommended]
588 An external dictionary can be used as a means of "pre-populating" and tracking
589 allowed terms based on the widely available Ispell dictionary. This can be the system
590 dictionary or a personal version, but in order to support whitelisting, it must be
591 editable to the process running Koha.
593 To enable, enter the absolute path to the ispell dictionary in the system
594 preference "TagsExternalDictionary".
596 Using external Ispell is recommended for both ease of use and performance. Note that any
597 language version of Ispell can be installed. It is also possible to modify the dictionary
598 at the command line to affect the desired content.
600 WARNING: The default Ispell dictionary includes (properly spelled) obscenities! Users
601 should build their own wordlist and recompile Ispell based on it. See man ispell for
604 =head2 Table Structure
606 The tables used by tags are:
612 Your first thought may be that this looks a little complicated. It is, but only because
613 it has to be. I'll try to explain.
615 tags_all - This table would be all we really need if we didn't care about moderation or
616 performance or tags disappearing when borrowers are removed. Too bad, we do. Otherwise
617 though, it contains all the relevant info about a given tag:
618 tag_id - unique id number for it
619 borrowernumber - user that entered it
620 biblionumber - book record it is attached to
621 term - tag "term" itself
622 language - perhaps used later to influence weighting
623 date_created - date and time it was created
625 tags_approval - Since we need to provide moderation, this table is used to track it. If no
626 external dictionary is used, this table is the sole reference for approval and rejection.
627 With an external dictionary, it tracks pending terms and past whitelist/blacklist actions.
628 This could be called an "approved terms" table. See above regarding the External Dictionary.
629 term - tag "term" itself
630 approved - Negative, 0 or positive if tag is rejected, pending or approved.
631 date_approved - date of last action
632 approved_by - staffer performing the last action
633 weight_total - total occurance of term in any biblio by any users
635 tags_index - This table is for performance, because by far the most common operation will
636 be fetching tags for a list of search results. We will have a set of biblios, and we will
637 want ONLY their approved tags and overall weighting. While we could implement a query that
638 would traverse tags_all filtered against tags_approval, the performance implications of
639 trying to calculate that and the "weight" (number of times a tag appears) on the fly are drastic.
640 term - approved term as it appears in tags_approval
641 biblionumber - book record it is attached to
642 weight - number of times tag applied by any user
644 tags_blacklist - A set of regular expression filters. Unsurprisingly, these should be perl-
645 compatible (PCRE) for your version of perl. Since this is a blacklist, a term will be
646 blocked if it matches any of the given patterns. WARNING: do not add blacklist regexps
647 if you do not understand their operation and interaction. It is quite easy to define too
648 simple or too complex a regexp and effectively block all terms. The blacklist operation is
649 fairly resource intensive, since every line of tags_blacklist will need to be read and compared.
650 It is recommended that tags_blacklist be used minimally, and only by an administrator with an
651 understanding of regular expression syntax and performance.
653 So the best way to think about the different tables is that they are each tailored to a certain
654 use. Note that tags_approval and tags_index do not rely on the user's borrower mapping, so
655 the tag population can continue to grow even if a user (along with their corresponding
656 rows in tags_all) is removed.
660 If you want to auto-populate some tags for debugging, do something like this:
662 mysql> select biblionumber from biblio where title LIKE "%Health%";
693 26 rows in set (0.00 sec)
695 Then, take those numbers and type/pipe them into this perl command line:
696 perl -ne 'use C4::Tags qw(get_tags add_tag); use Data::Dumper;chomp; add_tag($_,"health",51,1); print Dumper get_tags({limit=>5,term=>"health",});'
698 Note, the borrowernumber in this example is 51. Use your own or any arbitrary valid borrowernumber.