Bug 19893: (QA follow-up) Spelling correction in POD
[koha.git] / C4 / Tags.pm
blob1db7accc06bed815795019c63357883edec423ee
1 package C4::Tags;
3 # Copyright Liblime 2008
4 # Parts Copyright ACPL 2011
6 # This file is part of Koha.
8 # Koha is free software; you can redistribute it and/or modify it
9 # under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 3 of the License, or
11 # (at your option) any later version.
13 # Koha is distributed in the hope that it will be useful, but
14 # WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with Koha; if not, see <http://www.gnu.org/licenses>.
21 use strict;
22 use warnings;
23 use Carp;
24 use Exporter;
26 use C4::Context;
27 use C4::Debug;
28 use Module::Load::Conditional qw/check_install/;
29 #use Data::Dumper;
30 use constant TAG_FIELDS => qw(tag_id borrowernumber biblionumber term language date_created);
31 use constant TAG_SELECT => "SELECT " . join(',', TAG_FIELDS) . "\n FROM tags_all\n";
33 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
35 BEGIN {
36 @ISA = qw(Exporter);
37 @EXPORT_OK = qw(
38 &get_tag &get_tags &get_tag_rows
39 &add_tags &add_tag
40 &delete_tag_row_by_id
41 &remove_tag
42 &delete_tag_rows_by_ids
43 &get_approval_rows
44 &blacklist
45 &whitelist
46 &is_approved
47 &approval_counts
48 &get_count_by_tag_status
49 &get_filters
50 stratify_tags
52 # %EXPORT_TAGS = ();
53 my $ext_dict = C4::Context->preference('TagsExternalDictionary');
54 if ( $ext_dict && ! check_install( module => 'Lingua::Ispell' ) ) {
55 warn "Ignoring TagsExternalDictionary, because Lingua::Ispell is not installed.";
56 $ext_dict = q{};
58 if ($debug) {
59 require Data::Dumper;
60 import Data::Dumper qw(:DEFAULT);
61 print STDERR __PACKAGE__ . " external dictionary = " . ($ext_dict||'none') . "\n";
63 if ($ext_dict) {
64 require Lingua::Ispell;
65 import Lingua::Ispell qw(spellcheck add_word_lc);
66 $Lingua::Ispell::path = $ext_dict;
67 $debug and print STDERR "\$Lingua::Ispell::path = $Lingua::Ispell::path\n";
71 =head1 C4::Tags.pm - Support for user tagging of biblios.
73 More verose debugging messages are sent in the presence of non-zero $ENV{"DEBUG"}.
75 =cut
77 sub get_filters {
78 my $query = "SELECT * FROM tags_filters ";
79 my ($sth);
80 if (@_) {
81 $sth = C4::Context->dbh->prepare($query . " WHERE filter_id = ? ");
82 $sth->execute(shift);
83 } else {
84 $sth = C4::Context->dbh->prepare($query);
85 $sth->execute;
87 return $sth->fetchall_arrayref({});
90 # (SELECT count(*) FROM tags_all ) as tags_all,
91 # (SELECT count(*) FROM tags_index ) as tags_index,
93 sub approval_counts {
94 my $query = "SELECT
95 (SELECT count(*) FROM tags_approval WHERE approved= 1) as approved_count,
96 (SELECT count(*) FROM tags_approval WHERE approved=-1) as rejected_count,
97 (SELECT count(*) FROM tags_approval WHERE approved= 0) as unapproved_count
99 my $sth = C4::Context->dbh->prepare($query);
100 $sth->execute;
101 my $result = $sth->fetchrow_hashref();
102 $result->{approved_total} = $result->{approved_count} + $result->{rejected_count} + $result->{unapproved_count};
103 $debug and warn "counts returned: " . Dumper $result;
104 return $result;
107 =head2 get_count_by_tag_status
109 get_count_by_tag_status($status);
111 Takes a status and gets a count of tags with that status
113 =cut
115 sub get_count_by_tag_status {
116 my ($status) = @_;
117 my $dbh = C4::Context->dbh;
118 my $query =
119 "SELECT count(*) FROM tags_approval WHERE approved=?";
120 my $sth = $dbh->prepare($query);
121 $sth->execute( $status );
122 return $sth->fetchrow;
125 sub remove_tag {
126 my $tag_id = shift or return;
127 my $user_id = (@_) ? shift : undef;
128 my $rows = (defined $user_id) ?
129 get_tag_rows({tag_id=>$tag_id, borrowernumber=>$user_id}) :
130 get_tag_rows({tag_id=>$tag_id}) ;
131 $rows or return 0;
132 (scalar(@$rows) == 1) or return; # should never happen (duplicate ids)
133 my $row = shift(@$rows);
134 ($tag_id == $row->{tag_id}) or return 0;
135 my $tags = get_tags({term=>$row->{term}, biblionumber=>$row->{biblionumber}});
136 my $index = shift(@$tags);
137 $debug and print STDERR
138 sprintf "remove_tag: tag_id=>%s, biblionumber=>%s, weight=>%s, weight_total=>%s\n",
139 $row->{tag_id}, $row->{biblionumber}, $index->{weight}, $index->{weight_total};
140 if ($index->{weight} <= 1) {
141 delete_tag_index($row->{term},$row->{biblionumber});
142 } else {
143 decrement_weight($row->{term},$row->{biblionumber});
145 if ($index->{weight_total} <= 1) {
146 delete_tag_approval($row->{term});
147 } else {
148 decrement_weight_total($row->{term});
150 delete_tag_row_by_id($tag_id);
153 sub delete_tag_index {
154 (@_) or return;
155 my $sth = C4::Context->dbh->prepare("DELETE FROM tags_index WHERE term = ? AND biblionumber = ? LIMIT 1");
156 $sth->execute(@_);
157 return $sth->rows || 0;
159 sub delete_tag_approval {
160 (@_) or return;
161 my $sth = C4::Context->dbh->prepare("DELETE FROM tags_approval WHERE term = ? LIMIT 1");
162 $sth->execute(shift);
163 return $sth->rows || 0;
165 sub delete_tag_row_by_id {
166 (@_) or return;
167 my $sth = C4::Context->dbh->prepare("DELETE FROM tags_all WHERE tag_id = ? LIMIT 1");
168 $sth->execute(shift);
169 return $sth->rows || 0;
171 sub delete_tag_rows_by_ids {
172 (@_) or return;
173 my $i=0;
174 foreach(@_) {
175 $i += delete_tag_row_by_id($_);
177 ($i == scalar(@_)) or
178 warn sprintf "delete_tag_rows_by_ids tried %s tag_ids, only succeeded on $i", scalar(@_);
179 return $i;
182 sub get_tag_rows {
183 my $hash = shift || {};
184 my @ok_fields = TAG_FIELDS;
185 push @ok_fields, 'limit'; # push the limit! :)
186 my $wheres;
187 my $limit = "";
188 my @exe_args = ();
189 foreach my $key (keys %$hash) {
190 $debug and print STDERR "get_tag_rows arg. '$key' = ", $hash->{$key}, "\n";
191 unless (length $key) {
192 carp "Empty argument key to get_tag_rows: ignoring!";
193 next;
195 unless (1 == scalar grep {/^ $key $/x} @ok_fields) {
196 carp "get_tag_rows received unreconized argument key '$key'.";
197 next;
199 if ($key eq 'limit') {
200 my $val = $hash->{$key};
201 unless ($val =~ /^(\d+,)?\d+$/) {
202 carp "Non-nuerical limit value '$val' ignored!";
203 next;
205 $limit = " LIMIT $val\n";
206 } else {
207 $wheres .= ($wheres) ? " AND $key = ?\n" : " WHERE $key = ?\n";
208 push @exe_args, $hash->{$key};
211 my $query = TAG_SELECT . ($wheres||'') . $limit;
212 $debug and print STDERR "get_tag_rows query:\n $query\n",
213 "get_tag_rows query args: ", join(',', @exe_args), "\n";
214 my $sth = C4::Context->dbh->prepare($query);
215 if (@exe_args) {
216 $sth->execute(@exe_args);
217 } else {
218 $sth->execute;
220 return $sth->fetchall_arrayref({});
223 sub get_tags { # i.e., from tags_index
224 my $hash = shift || {};
225 my @ok_fields = qw(term biblionumber weight limit sort approved);
226 my $wheres;
227 my $limit = "";
228 my $order = "";
229 my @exe_args = ();
230 foreach my $key (keys %$hash) {
231 $debug and print STDERR "get_tags arg. '$key' = ", $hash->{$key}, "\n";
232 unless (length $key) {
233 carp "Empty argument key to get_tags: ignoring!";
234 next;
236 unless (1 == scalar grep {/^ $key $/x} @ok_fields) {
237 carp "get_tags received unreconized argument key '$key'.";
238 next;
240 if ($key eq 'limit') {
241 my $val = $hash->{$key};
242 unless ($val =~ /^(\d+,)?\d+$/) {
243 carp "Non-nuerical limit value '$val' ignored!";
244 next;
246 $limit = " LIMIT $val\n";
247 } elsif ($key eq 'sort') {
248 foreach my $by (split /\,/, $hash->{$key}) {
249 unless (
250 $by =~ /^([-+])?(term)/ or
251 $by =~ /^([-+])?(biblionumber)/ or
252 $by =~ /^([-+])?(weight)/
254 carp "get_tags received illegal sort order '$by'";
255 next;
257 if ($order) {
258 $order .= ", ";
259 } else {
260 $order = " ORDER BY ";
262 $order .= $2 . " " . ((!$1) ? '' : $1 eq '-' ? 'DESC' : $1 eq '+' ? 'ASC' : '') . "\n";
265 } else {
266 my $whereval = $hash->{$key};
267 my $longkey = ($key eq 'term' ) ? 'tags_index.term' :
268 ($key eq 'approved') ? 'tags_approval.approved' : $key;
269 my $op = ($whereval =~ s/^(>=|<=)// or
270 $whereval =~ s/^(>|=|<)// ) ? $1 : '=';
271 $wheres .= ($wheres) ? " AND $longkey $op ?\n" : " WHERE $longkey $op ?\n";
272 push @exe_args, $whereval;
275 my $query = "
276 SELECT tags_index.term as term,biblionumber,weight,weight_total
277 FROM tags_index
278 LEFT JOIN tags_approval
279 ON tags_index.term = tags_approval.term
280 " . ($wheres||'') . $order . $limit;
281 $debug and print STDERR "get_tags query:\n $query\n",
282 "get_tags query args: ", join(',', @exe_args), "\n";
283 my $sth = C4::Context->dbh->prepare($query);
284 if (@exe_args) {
285 $sth->execute(@exe_args);
286 } else {
287 $sth->execute;
289 return $sth->fetchall_arrayref({});
292 sub get_approval_rows { # i.e., from tags_approval
293 my $hash = shift || {};
294 my @ok_fields = qw(term approved date_approved approved_by weight_total limit sort borrowernumber);
295 my $wheres;
296 my $limit = "";
297 my $order = "";
298 my @exe_args = ();
299 foreach my $key (keys %$hash) {
300 $debug and print STDERR "get_approval_rows arg. '$key' = ", $hash->{$key}, "\n";
301 unless (length $key) {
302 carp "Empty argument key to get_approval_rows: ignoring!";
303 next;
305 unless (1 == scalar grep {/^ $key $/x} @ok_fields) {
306 carp "get_approval_rows received unreconized argument key '$key'.";
307 next;
309 if ($key eq 'limit') {
310 my $val = $hash->{$key};
311 unless ($val =~ /^(\d+,)?\d+$/) {
312 carp "Non-numerical limit value '$val' ignored!";
313 next;
315 $limit = " LIMIT $val\n";
316 } elsif ($key eq 'sort') {
317 foreach my $by (split /\,/, $hash->{$key}) {
318 unless (
319 $by =~ /^([-+])?(term)/ or
320 $by =~ /^([-+])?(biblionumber)/ or
321 $by =~ /^([-+])?(borrowernumber)/ or
322 $by =~ /^([-+])?(weight_total)/ or
323 $by =~ /^([-+])?(approved(_by)?)/ or
324 $by =~ /^([-+])?(date_approved)/
326 carp "get_approval_rows received illegal sort order '$by'";
327 next;
329 if ($order) {
330 $order .= ", ";
331 } else {
332 $order = " ORDER BY " unless $order;
334 $order .= $2 . " " . ((!$1) ? '' : $1 eq '-' ? 'DESC' : $1 eq '+' ? 'ASC' : '') . "\n";
337 } else {
338 my $whereval = $hash->{$key};
339 my $op = ($whereval =~ s/^(>=|<=)// or
340 $whereval =~ s/^(>|=|<)// ) ? $1 : '=';
341 $wheres .= ($wheres) ? " AND $key $op ?\n" : " WHERE $key $op ?\n";
342 push @exe_args, $whereval;
345 my $query = "
346 SELECT tags_approval.term AS term,
347 tags_approval.approved AS approved,
348 tags_approval.date_approved AS date_approved,
349 tags_approval.approved_by AS approved_by,
350 tags_approval.weight_total AS weight_total,
351 CONCAT(borrowers.surname, ', ', borrowers.firstname) AS approved_by_name
352 FROM tags_approval
353 LEFT JOIN borrowers
354 ON tags_approval.approved_by = borrowers.borrowernumber ";
355 $query .= ($wheres||'') . $order . $limit;
356 $debug and print STDERR "get_approval_rows query:\n $query\n",
357 "get_approval_rows query args: ", join(',', @exe_args), "\n";
358 my $sth = C4::Context->dbh->prepare($query);
359 if (@exe_args) {
360 $sth->execute(@exe_args);
361 } else {
362 $sth->execute;
364 return $sth->fetchall_arrayref({});
367 sub is_approved {
368 my $term = shift or return;
369 my $sth = C4::Context->dbh->prepare("SELECT approved FROM tags_approval WHERE term = ?");
370 $sth->execute($term);
371 my $ext_dict = C4::Context->preference('TagsExternalDictionary');
372 unless ($sth->rows) {
373 $ext_dict and return (spellcheck($term) ? 0 : 1); # spellcheck returns empty on OK word
374 return 0;
376 return $sth->fetchrow;
379 sub get_tag_index {
380 my $term = shift or return;
381 my $sth;
382 if (@_) {
383 $sth = C4::Context->dbh->prepare("SELECT * FROM tags_index WHERE term = ? AND biblionumber = ?");
384 $sth->execute($term,shift);
385 } else {
386 $sth = C4::Context->dbh->prepare("SELECT * FROM tags_index WHERE term = ?");
387 $sth->execute($term);
389 return $sth->fetchrow_hashref;
392 sub whitelist {
393 my $operator = shift;
394 defined $operator or return; # have to test defined to allow =0 (kohaadmin)
395 my $ext_dict = C4::Context->preference('TagsExternalDictionary');
396 if ($ext_dict) {
397 foreach (@_) {
398 spellcheck($_) or next;
399 add_word_lc($_);
402 foreach (@_) {
403 my $aref = get_approval_rows({term=>$_});
404 if ($aref and scalar @$aref) {
405 mod_tag_approval($operator,$_,1);
406 } else {
407 add_tag_approval($_,$operator);
410 return scalar @_;
412 # note: there is no "unwhitelist" operation because there is no remove for Ispell.
413 # The blacklist regexps should operate "in front of" the whitelist, so if you approve
414 # a term mistakenly, you can still reverse it. But there is no going back to "neutral".
415 sub blacklist {
416 my $operator = shift;
417 defined $operator or return; # have to test defined to allow =0 (kohaadmin)
418 foreach (@_) {
419 my $aref = get_approval_rows({term=>$_});
420 if ($aref and scalar @$aref) {
421 mod_tag_approval($operator,$_,-1);
422 } else {
423 add_tag_approval($_,$operator,-1);
426 return scalar @_;
428 sub add_filter {
429 my $operator = shift;
430 defined $operator or return; # have to test defined to allow =0 (kohaadmin)
431 my $query = "INSERT INTO tags_blacklist (regexp,y,z) VALUES (?,?,?)";
432 # my $sth = C4::Context->dbh->prepare($query);
433 return scalar @_;
435 sub remove_filter {
436 my $operator = shift;
437 defined $operator or return; # have to test defined to allow =0 (kohaadmin)
438 my $query = "REMOVE FROM tags_blacklist WHERE blacklist_id = ?";
439 # my $sth = C4::Context->dbh->prepare($query);
440 # $sth->execute($term);
441 return scalar @_;
444 sub add_tag_approval { # or disapproval
445 $debug and warn "add_tag_approval(" . join(", ",map {defined($_) ? $_ : 'UNDEF'} @_) . ")";
446 my $term = shift or return;
447 my $query = "SELECT * FROM tags_approval WHERE term = ?";
448 my $sth = C4::Context->dbh->prepare($query);
449 $sth->execute($term);
450 ($sth->rows) and return increment_weight_total($term);
451 my $operator = shift || 0;
452 my $approval = (@_ ? shift : 0); # default is unapproved
453 my @exe_args = ($term); # all 3 queries will use this argument
454 if ($operator) {
455 $query = "INSERT INTO tags_approval (term,approved_by,approved,date_approved) VALUES (?,?,?,NOW())";
456 push @exe_args, $operator, $approval;
457 } elsif ($approval) {
458 $query = "INSERT INTO tags_approval (term,approved,date_approved) VALUES (?,?,NOW())";
459 push @exe_args, $approval;
460 } else {
461 $query = "INSERT INTO tags_approval (term,date_approved) VALUES (?,NOW())";
463 $debug and print STDERR "add_tag_approval query: $query\nadd_tag_approval args: (" . join(", ", @exe_args) . ")\n";
464 $sth = C4::Context->dbh->prepare($query);
465 $sth->execute(@exe_args);
466 return $sth->rows;
469 sub mod_tag_approval {
470 my $operator = shift;
471 defined $operator or return; # have to test defined to allow =0 (kohaadmin)
472 my $term = shift or return;
473 my $approval = (scalar @_ ? shift : 1); # default is to approve
474 my $query = "UPDATE tags_approval SET approved_by=?, approved=?, date_approved=NOW() WHERE term = ?";
475 $debug and print STDERR "mod_tag_approval query: $query\nmod_tag_approval args: ($operator,$approval,$term)\n";
476 my $sth = C4::Context->dbh->prepare($query);
477 $sth->execute($operator,$approval,$term);
480 sub add_tag_index {
481 my $term = shift or return;
482 my $biblionumber = shift or return;
483 my $query = "SELECT * FROM tags_index WHERE term = ? AND biblionumber = ?";
484 my $sth = C4::Context->dbh->prepare($query);
485 $sth->execute($term,$biblionumber);
486 ($sth->rows) and return increment_weight($term,$biblionumber);
487 $query = "INSERT INTO tags_index (term,biblionumber) VALUES (?,?)";
488 $debug and print STDERR "add_tag_index query: $query\nadd_tag_index args: ($term,$biblionumber)\n";
489 $sth = C4::Context->dbh->prepare($query);
490 $sth->execute($term,$biblionumber);
491 return $sth->rows;
494 sub get_tag { # by tag_id
495 (@_) or return;
496 my $sth = C4::Context->dbh->prepare(TAG_SELECT . "WHERE tag_id = ?");
497 $sth->execute(shift);
498 return $sth->fetchrow_hashref;
501 sub increment_weights {
502 increment_weight(@_);
503 increment_weight_total(shift);
505 sub decrement_weights {
506 decrement_weight(@_);
507 decrement_weight_total(shift);
509 sub increment_weight_total {
510 _set_weight_total('weight_total+1',shift);
512 sub increment_weight {
513 _set_weight('weight+1',shift,shift);
515 sub decrement_weight_total {
516 _set_weight_total('weight_total-1',shift);
518 sub decrement_weight {
519 _set_weight('weight-1',shift,shift);
521 sub _set_weight_total {
522 my $sth = C4::Context->dbh->prepare("
523 UPDATE tags_approval
524 SET weight_total=" . (shift) . "
525 WHERE term=?
526 "); # note: CANNOT use "?" for weight_total (see the args above).
527 $sth->execute(shift); # just the term
529 sub _set_weight {
530 my $dbh = C4::Context->dbh;
531 my $sth = $dbh->prepare("
532 UPDATE tags_index
533 SET weight=" . (shift) . "
534 WHERE term=?
535 AND biblionumber=?
537 $sth->execute(@_);
540 sub add_tag { # biblionumber,term,[borrowernumber,approvernumber]
541 my $biblionumber = shift or return;
542 my $term = shift or return;
543 my $borrowernumber = (@_) ? shift : 0; # the user, default to kohaadmin
544 $term =~ s/^\s+//;
545 $term =~ s/\s+$//;
546 ($term) or return; # must be more than whitespace
547 my $rows = get_tag_rows({biblionumber=>$biblionumber, borrowernumber=>$borrowernumber, term=>$term, limit=>1});
548 my $query = "INSERT INTO tags_all
549 (borrowernumber,biblionumber,term,date_created)
550 VALUES (?,?,?,NOW())";
551 $debug and print STDERR "add_tag query: $query\n",
552 "add_tag query args: ($borrowernumber,$biblionumber,$term)\n";
553 if (scalar @$rows) {
554 $debug and carp "Duplicate tag detected. Tag not added.";
555 return;
557 # add to tags_all regardless of approaval
558 my $sth = C4::Context->dbh->prepare($query);
559 $sth->execute($borrowernumber,$biblionumber,$term);
561 # then
562 if (scalar @_) { # if arg remains, it is the borrowernumber of the approver: tag is pre-approved.
563 my $approver = shift;
564 $debug and print STDERR "term '$term' pre-approved by borrower #$approver\n";
565 add_tag_approval($term,$approver,1);
566 add_tag_index($term,$biblionumber,$approver);
567 } elsif (is_approved($term) >= 1) {
568 $debug and print STDERR "term '$term' approved by whitelist\n";
569 add_tag_approval($term,0,1);
570 add_tag_index($term,$biblionumber,1);
571 } else {
572 $debug and print STDERR "term '$term' NOT approved (yet)\n";
573 add_tag_approval($term);
574 add_tag_index($term,$biblionumber);
578 # This takes a set of tags, as returned by C<get_approval_rows> and divides
579 # them up into a number of "strata" based on their weight. This is useful
580 # to display them in a number of different sizes.
582 # Usage:
583 # ($min, $max) = stratify_tags($strata, $tags);
584 # $stratum: the number of divisions you want
585 # $tags: the tags, as provided by get_approval_rows
586 # $min: the minimum stratum value
587 # $max: the maximum stratum value. This may be the same as $min if there
588 # is only one weight. Beware of divide by zeros.
589 # This will add a field to the tag called "stratum" containing the calculated
590 # value.
591 sub stratify_tags {
592 my ( $strata, $tags ) = @_;
593 return (0,0) if !@$tags;
594 my ( $min, $max );
595 foreach (@$tags) {
596 my $w = $_->{weight_total};
597 $min = $w if ( !defined($min) || $min > $w );
598 $max = $w if ( !defined($max) || $max < $w );
601 # normalise min to zero
602 $max = $max - $min;
603 my $orig_min = $min;
604 $min = 0;
606 # if min and max are the same, just make it 1
607 my $span = ( $strata - 1 ) / ( $max || 1 );
608 foreach (@$tags) {
609 my $w = $_->{weight_total};
610 $_->{stratum} = int( ( $w - $orig_min ) * $span );
612 return ( $min, $max );
616 __END__
618 =head2 add_tag(biblionumber,term[,borrowernumber])
620 =head3 TO DO: Add real perldoc
622 =cut
624 =head2 External Dictionary (Ispell) [Recommended]
626 An external dictionary can be used as a means of "pre-populating" and tracking
627 allowed terms based on the widely available Ispell dictionary. This can be the system
628 dictionary or a personal version, but in order to support whitelisting, it must be
629 editable to the process running Koha.
631 To enable, enter the absolute path to the ispell dictionary in the system
632 preference "TagsExternalDictionary".
634 Using external Ispell is recommended for both ease of use and performance. Note that any
635 language version of Ispell can be installed. It is also possible to modify the dictionary
636 at the command line to affect the desired content.
638 WARNING: The default Ispell dictionary includes (properly spelled) obscenities! Users
639 should build their own wordlist and recompile Ispell based on it. See man ispell for
640 instructions.
642 =head2 Table Structure
644 The tables used by tags are:
645 tags_all
646 tags_index
647 tags_approval
648 tags_blacklist
650 Your first thought may be that this looks a little complicated. It is, but only because
651 it has to be. I'll try to explain.
653 tags_all - This table would be all we really need if we didn't care about moderation or
654 performance or tags disappearing when borrowers are removed. Too bad, we do. Otherwise
655 though, it contains all the relevant info about a given tag:
656 tag_id - unique id number for it
657 borrowernumber - user that entered it
658 biblionumber - book record it is attached to
659 term - tag "term" itself
660 language - perhaps used later to influence weighting
661 date_created - date and time it was created
663 tags_approval - Since we need to provide moderation, this table is used to track it. If no
664 external dictionary is used, this table is the sole reference for approval and rejection.
665 With an external dictionary, it tracks pending terms and past whitelist/blacklist actions.
666 This could be called an "approved terms" table. See above regarding the External Dictionary.
667 term - tag "term" itself
668 approved - Negative, 0 or positive if tag is rejected, pending or approved.
669 date_approved - date of last action
670 approved_by - staffer performing the last action
671 weight_total - total occurrence of term in any biblio by any users
673 tags_index - This table is for performance, because by far the most common operation will
674 be fetching tags for a list of search results. We will have a set of biblios, and we will
675 want ONLY their approved tags and overall weighting. While we could implement a query that
676 would traverse tags_all filtered against tags_approval, the performance implications of
677 trying to calculate that and the "weight" (number of times a tag appears) on the fly are drastic.
678 term - approved term as it appears in tags_approval
679 biblionumber - book record it is attached to
680 weight - number of times tag applied by any user
682 tags_blacklist - A set of regular expression filters. Unsurprisingly, these should be perl-
683 compatible (PCRE) for your version of perl. Since this is a blacklist, a term will be
684 blocked if it matches any of the given patterns. WARNING: do not add blacklist regexps
685 if you do not understand their operation and interaction. It is quite easy to define too
686 simple or too complex a regexp and effectively block all terms. The blacklist operation is
687 fairly resource intensive, since every line of tags_blacklist will need to be read and compared.
688 It is recommended that tags_blacklist be used minimally, and only by an administrator with an
689 understanding of regular expression syntax and performance.
691 So the best way to think about the different tables is that they are each tailored to a certain
692 use. Note that tags_approval and tags_index do not rely on the user's borrower mapping, so
693 the tag population can continue to grow even if a user (along with their corresponding
694 rows in tags_all) is removed.
696 =head2 Tricks
698 If you want to auto-populate some tags for debugging, do something like this:
700 mysql> select biblionumber from biblio where title LIKE "%Health%";
701 +--------------+
702 | biblionumber |
703 +--------------+
704 | 18 |
705 | 22 |
706 | 24 |
707 | 30 |
708 | 44 |
709 | 45 |
710 | 46 |
711 | 49 |
712 | 111 |
713 | 113 |
714 | 128 |
715 | 146 |
716 | 155 |
717 | 518 |
718 | 522 |
719 | 524 |
720 | 530 |
721 | 544 |
722 | 545 |
723 | 546 |
724 | 549 |
725 | 611 |
726 | 613 |
727 | 628 |
728 | 646 |
729 | 655 |
730 +--------------+
731 26 rows in set (0.00 sec)
733 Then, take those numbers and type/pipe them into this perl command line:
734 perl -ne 'use C4::Tags qw(get_tags add_tag); use Data::Dumper;chomp; add_tag($_,"health",51,1); print Dumper get_tags({limit=>5,term=>"health",});'
736 Note, the borrowernumber in this example is 51. Use your own or any arbitrary valid borrowernumber.
738 =cut